Merge tag 'linux-cpupower-6.6-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>
Tue, 29 Aug 2023 18:33:27 +0000 (20:33 +0200)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Tue, 29 Aug 2023 18:33:27 +0000 (20:33 +0200)
Merge additional cpupower utility update for 6.6 from Shuah Khan:

"This cpupower update [...] consists of a single fix to add Georgian
 translation to Makefile LANGUAGES."

* tag 'linux-cpupower-6.6-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux:
  cpupower: Add Georgian translation to Makefile LANGUAGES

2795 files changed:
.mailmap
Documentation/ABI/testing/sysfs-bus-cxl
Documentation/ABI/testing/sysfs-class-led-trigger-netdev
Documentation/ABI/testing/sysfs-devices-system-cpu
Documentation/ABI/testing/sysfs-driver-chromeos-acpi
Documentation/ABI/testing/sysfs-driver-ufs
Documentation/ABI/testing/sysfs-module
Documentation/ABI/testing/sysfs-platform-hidma
Documentation/ABI/testing/sysfs-platform-hidma-mgmt
Documentation/RCU/lockdep-splat.rst
Documentation/RCU/rculist_nulls.rst
Documentation/admin-guide/devices.txt
Documentation/admin-guide/hw-vuln/gather_data_sampling.rst [new file with mode: 0644]
Documentation/admin-guide/hw-vuln/index.rst
Documentation/admin-guide/hw-vuln/spectre.rst
Documentation/admin-guide/hw-vuln/srso.rst [new file with mode: 0644]
Documentation/admin-guide/kdump/vmcoreinfo.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/arch/arm64/silicon-errata.rst
Documentation/arch/arm64/sme.rst
Documentation/arch/index.rst
Documentation/arch/s390/3270.ChangeLog [moved from Documentation/s390/3270.ChangeLog with 100% similarity]
Documentation/arch/s390/3270.rst [moved from Documentation/s390/3270.rst with 99% similarity]
Documentation/arch/s390/cds.rst [moved from Documentation/s390/cds.rst with 99% similarity]
Documentation/arch/s390/common_io.rst [moved from Documentation/s390/common_io.rst with 98% similarity]
Documentation/arch/s390/config3270.sh [moved from Documentation/s390/config3270.sh with 100% similarity]
Documentation/arch/s390/driver-model.rst [moved from Documentation/s390/driver-model.rst with 100% similarity]
Documentation/arch/s390/features.rst [moved from Documentation/s390/features.rst with 100% similarity]
Documentation/arch/s390/index.rst [moved from Documentation/s390/index.rst with 100% similarity]
Documentation/arch/s390/monreader.rst [moved from Documentation/s390/monreader.rst with 100% similarity]
Documentation/arch/s390/pci.rst [moved from Documentation/s390/pci.rst with 99% similarity]
Documentation/arch/s390/qeth.rst [moved from Documentation/s390/qeth.rst with 100% similarity]
Documentation/arch/s390/s390dbf.rst [moved from Documentation/s390/s390dbf.rst with 100% similarity]
Documentation/arch/s390/text_files.rst [moved from Documentation/s390/text_files.rst with 100% similarity]
Documentation/arch/s390/vfio-ap-locking.rst [moved from Documentation/s390/vfio-ap-locking.rst with 100% similarity]
Documentation/arch/s390/vfio-ap.rst [moved from Documentation/s390/vfio-ap.rst with 100% similarity]
Documentation/arch/s390/vfio-ccw.rst [moved from Documentation/s390/vfio-ccw.rst with 99% similarity]
Documentation/arch/s390/zfcpdump.rst [moved from Documentation/s390/zfcpdump.rst with 100% similarity]
Documentation/arch/x86/boot.rst
Documentation/core-api/cpu_hotplug.rst
Documentation/devicetree/bindings/arm/pmu.yaml
Documentation/devicetree/bindings/cpufreq/cpufreq-qcom-hw.yaml
Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt [deleted file]
Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
Documentation/devicetree/bindings/interrupt-controller/amlogic,meson-gpio-intc.yaml
Documentation/devicetree/bindings/net/mediatek,net.yaml
Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/opp/opp-v2-base.yaml
Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt [deleted file]
Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml
Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
Documentation/devicetree/bindings/serial/cavium-uart.txt [deleted file]
Documentation/devicetree/bindings/serial/nxp,lpc1850-uart.txt [deleted file]
Documentation/devicetree/bindings/sound/audio-graph-card2.yaml
Documentation/devicetree/bindings/sound/google,sc7180-trogdor.yaml
Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
Documentation/devicetree/bindings/watchdog/loongson,ls1x-wdt.yaml [new file with mode: 0644]
Documentation/driver-api/s390-drivers.rst
Documentation/filesystems/fscrypt.rst
Documentation/filesystems/idmappings.rst
Documentation/filesystems/locking.rst
Documentation/filesystems/porting.rst
Documentation/filesystems/tmpfs.rst
Documentation/filesystems/vfs.rst
Documentation/firmware-guide/acpi/chromeos-acpi-device.rst
Documentation/i2c/writing-clients.rst
Documentation/networking/napi.rst
Documentation/networking/nf_conntrack-sysctl.rst
Documentation/process/embargoed-hardware-issues.rst
Documentation/process/maintainer-netdev.rst
Documentation/process/security-bugs.rst
Documentation/riscv/hwprobe.rst
Documentation/scheduler/sched-design-CFS.rst
Documentation/wmi/devices/dell-wmi-ddv.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/include/asm/processor.h
arch/alpha/kernel/osf_sys.c
arch/alpha/kernel/setup.c
arch/alpha/kernel/syscalls/syscall.tbl
arch/arm/boot/dts/arm/integratorap.dts
arch/arm/boot/dts/microchip/sam9x60.dtsi
arch/arm/boot/dts/nspire/nspire.dtsi
arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts
arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi
arch/arm/boot/dts/nxp/imx/imx6sll.dtsi
arch/arm/boot/dts/nxp/imx/imx6sx.dtsi
arch/arm/boot/dts/nxp/imx/imx7s.dtsi
arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
arch/arm/configs/axm55xx_defconfig
arch/arm/configs/davinci_all_defconfig
arch/arm/configs/exynos_defconfig
arch/arm/configs/footbridge_defconfig
arch/arm/configs/imx_v6_v7_defconfig
arch/arm/configs/keystone_defconfig
arch/arm/configs/lpc32xx_defconfig
arch/arm/configs/milbeaut_m10v_defconfig
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/omap1_defconfig
arch/arm/configs/omap2plus_defconfig
arch/arm/configs/pxa_defconfig
arch/arm/configs/rpc_defconfig
arch/arm/configs/s5pv210_defconfig
arch/arm/configs/socfpga_defconfig
arch/arm/configs/spear13xx_defconfig
arch/arm/configs/spear3xx_defconfig
arch/arm/configs/spear6xx_defconfig
arch/arm/include/asm/syscall.h
arch/arm/kernel/entry-common.S
arch/arm/kernel/hw_breakpoint.c
arch/arm/kernel/ptrace.c
arch/arm/mach-pxa/sharpsl_pm.h
arch/arm/mach-pxa/spitz_pm.c
arch/arm/mach-zynq/pm.c
arch/arm/tools/syscall.tbl
arch/arm64/Kconfig
arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi [deleted symlink]
arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
arch/arm64/boot/dts/freescale/imx8mm.dtsi
arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
arch/arm64/boot/dts/freescale/imx8mn.dtsi
arch/arm64/boot/dts/freescale/imx8mq.dtsi
arch/arm64/boot/dts/freescale/imx93.dtsi
arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
arch/arm64/boot/dts/qcom/sa8775p-ride.dts
arch/arm64/boot/dts/qcom/sc7180.dtsi
arch/arm64/boot/dts/qcom/sc8180x.dtsi
arch/arm64/boot/dts/qcom/sm8150.dtsi
arch/arm64/boot/dts/qcom/sm8250.dtsi
arch/arm64/boot/dts/qcom/sm8350.dtsi
arch/arm64/boot/dts/renesas/r9a07g044.dtsi
arch/arm64/boot/dts/renesas/r9a07g054.dtsi
arch/arm64/boot/dts/rockchip/px30.dtsi
arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts
arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi
arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts
arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts
arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi
arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts
arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts
arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi
arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts
arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts
arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts
arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi
arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts
arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi
arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts
arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi
arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
arch/arm64/configs/defconfig
arch/arm64/include/asm/acpi.h
arch/arm64/include/asm/efi.h
arch/arm64/include/asm/el2_setup.h
arch/arm64/include/asm/fpsimd.h
arch/arm64/include/asm/ftrace.h
arch/arm64/include/asm/hwcap.h
arch/arm64/include/asm/kernel-pgtable.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_emulate.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/include/asm/mmu.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/sdei.h
arch/arm64/include/asm/syscall.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/unistd.h
arch/arm64/include/asm/unistd32.h
arch/arm64/include/asm/virt.h
arch/arm64/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
arch/arm64/include/uapi/asm/hwcap.h
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuidle.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/efi.c
arch/arm64/kernel/entry-common.c
arch/arm64/kernel/entry.S
arch/arm64/kernel/fpsimd.c
arch/arm64/kernel/head.S
arch/arm64/kernel/hw_breakpoint.c
arch/arm64/kernel/idreg-override.c
arch/arm64/kernel/pci.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/sdei.c
arch/arm64/kernel/signal.c
arch/arm64/kernel/smp.c
arch/arm64/kernel/syscall.c
arch/arm64/kernel/vdso/vdso.lds.S
arch/arm64/kernel/vdso/vgettimeofday.c
arch/arm64/kvm/arch_timer.c
arch/arm64/kvm/arm.c
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/nvhe/Makefile
arch/arm64/kvm/hyp/nvhe/ffa.c
arch/arm64/kvm/hyp/nvhe/host.S
arch/arm64/kvm/hyp/nvhe/list_debug.c
arch/arm64/kvm/hyp/nvhe/psci-relay.c
arch/arm64/kvm/hyp/nvhe/switch.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/pkvm.c
arch/arm64/kvm/sys_regs.c
arch/arm64/kvm/vgic/vgic-v3.c
arch/arm64/kvm/vgic/vgic-v4.c
arch/arm64/mm/init.c
arch/arm64/mm/proc.S
arch/arm64/mm/trans_pgd.c
arch/arm64/net/bpf_jit_comp.c
arch/arm64/tools/sysreg
arch/ia64/configs/bigsur_defconfig
arch/ia64/configs/generic_defconfig
arch/ia64/configs/gensparse_defconfig
arch/ia64/configs/tiger_defconfig
arch/ia64/include/asm/acpi.h
arch/ia64/include/asm/processor.h
arch/ia64/kernel/sys_ia64.c
arch/ia64/kernel/syscalls/syscall.tbl
arch/loongarch/Kconfig
arch/loongarch/Makefile
arch/loongarch/configs/loongson3_defconfig
arch/loongarch/include/asm/Kbuild
arch/loongarch/include/asm/fpu.h
arch/loongarch/include/asm/local.h
arch/loongarch/include/asm/ptrace.h
arch/loongarch/include/asm/smp.h
arch/loongarch/kernel/fpu.S
arch/loongarch/kernel/hw_breakpoint.c
arch/loongarch/kernel/mcount.S
arch/loongarch/kernel/mcount_dyn.S
arch/loongarch/kernel/process.c
arch/loongarch/kernel/ptrace.c
arch/loongarch/kernel/setup.c
arch/loongarch/kernel/smp.c
arch/loongarch/kernel/traps.c
arch/loongarch/lib/clear_user.S
arch/loongarch/lib/copy_user.S
arch/loongarch/lib/memcpy.S
arch/loongarch/lib/memmove.S
arch/loongarch/lib/memset.S
arch/loongarch/lib/unaligned.S
arch/loongarch/mm/page.S
arch/loongarch/mm/tlbex.S
arch/loongarch/net/bpf_jit.h
arch/m68k/configs/amiga_defconfig
arch/m68k/configs/apollo_defconfig
arch/m68k/configs/atari_defconfig
arch/m68k/configs/bvme6000_defconfig
arch/m68k/configs/hp300_defconfig
arch/m68k/configs/mac_defconfig
arch/m68k/configs/multi_defconfig
arch/m68k/configs/mvme147_defconfig
arch/m68k/configs/mvme16x_defconfig
arch/m68k/configs/q40_defconfig
arch/m68k/configs/sun3_defconfig
arch/m68k/configs/sun3x_defconfig
arch/m68k/fpsp040/skeleton.S
arch/m68k/ifpsp060/os.S
arch/m68k/include/asm/Kbuild
arch/m68k/include/asm/div64.h
arch/m68k/include/asm/string.h
arch/m68k/kernel/relocate_kernel.S
arch/m68k/kernel/syscalls/syscall.tbl
arch/m68k/lib/divsi3.S
arch/m68k/lib/modsi3.S
arch/m68k/lib/mulsi3.S
arch/m68k/lib/udivsi3.S
arch/m68k/lib/umodsi3.S
arch/microblaze/kernel/syscalls/syscall.tbl
arch/mips/configs/bigsur_defconfig
arch/mips/configs/fuloong2e_defconfig
arch/mips/configs/ip22_defconfig
arch/mips/configs/ip32_defconfig
arch/mips/configs/jazz_defconfig
arch/mips/configs/lemote2f_defconfig
arch/mips/configs/loongson2k_defconfig
arch/mips/configs/loongson3_defconfig
arch/mips/configs/mtx1_defconfig
arch/mips/configs/pic32mzda_defconfig
arch/mips/configs/rm200_defconfig
arch/mips/include/asm/local.h
arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
arch/mips/kernel/syscalls/syscall_n32.tbl
arch/mips/kernel/syscalls/syscall_n64.tbl
arch/mips/kernel/syscalls/syscall_o32.tbl
arch/openrisc/include/uapi/asm/sigcontext.h
arch/openrisc/kernel/signal.c
arch/parisc/Kconfig.debug
arch/parisc/boot/compressed/misc.c
arch/parisc/configs/generic-32bit_defconfig
arch/parisc/configs/generic-64bit_defconfig
arch/parisc/include/asm/dma.h
arch/parisc/include/asm/ftrace.h
arch/parisc/include/asm/spinlock.h
arch/parisc/include/asm/spinlock_types.h
arch/parisc/kernel/entry.S
arch/parisc/kernel/firmware.c
arch/parisc/kernel/ftrace.c
arch/parisc/kernel/parisc_ksyms.c
arch/parisc/kernel/pci-dma.c
arch/parisc/kernel/pdt.c
arch/parisc/kernel/perf.c
arch/parisc/kernel/processor.c
arch/parisc/kernel/setup.c
arch/parisc/kernel/signal.c
arch/parisc/kernel/syscall.S
arch/parisc/kernel/syscalls/syscall.tbl
arch/parisc/kernel/unaligned.c
arch/parisc/lib/ucmpdi2.c
arch/parisc/mm/fault.c
arch/parisc/mm/fixmap.c
arch/parisc/mm/init.c
arch/parisc/mm/ioremap.c
arch/powerpc/configs/44x/sam440ep_defconfig
arch/powerpc/configs/85xx/stx_gp3_defconfig
arch/powerpc/configs/cell_defconfig
arch/powerpc/configs/ep8248e_defconfig
arch/powerpc/configs/mgcoge_defconfig
arch/powerpc/configs/pasemi_defconfig
arch/powerpc/configs/pmac32_defconfig
arch/powerpc/configs/powernv_defconfig
arch/powerpc/configs/ppc64_defconfig
arch/powerpc/configs/ppc64e_defconfig
arch/powerpc/configs/ppc6xx_defconfig
arch/powerpc/configs/ps3_defconfig
arch/powerpc/crypto/.gitignore [moved from tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore with 58% similarity]
arch/powerpc/include/asm/book3s/64/hash-4k.h
arch/powerpc/include/asm/book3s/64/hash-64k.h
arch/powerpc/include/asm/book3s/64/hash.h
arch/powerpc/include/asm/bug.h
arch/powerpc/include/asm/elf.h
arch/powerpc/include/asm/processor.h
arch/powerpc/include/asm/thread_info.h
arch/powerpc/include/asm/word-at-a-time.h
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/head_64.S
arch/powerpc/kernel/rtas_flash.c
arch/powerpc/kernel/security.c
arch/powerpc/kernel/syscalls/syscall.tbl
arch/powerpc/kernel/trace/ftrace_mprofile.S
arch/powerpc/kernel/traps.c
arch/powerpc/mm/book3s64/hash_native.c
arch/powerpc/mm/book3s64/subpage_prot.c
arch/powerpc/mm/init_64.c
arch/powerpc/mm/kasan/Makefile
arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
arch/powerpc/platforms/85xx/smp.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/powermac/time.c
arch/powerpc/platforms/pseries/vas.c
arch/riscv/Kconfig
arch/riscv/configs/defconfig
arch/riscv/configs/rv32_defconfig
arch/riscv/include/asm/acpi.h
arch/riscv/include/asm/cacheflush.h
arch/riscv/include/asm/efi.h
arch/riscv/include/asm/insn.h
arch/riscv/include/asm/mmio.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/vector.h
arch/riscv/include/asm/vmalloc.h
arch/riscv/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
arch/riscv/include/uapi/asm/ptrace.h
arch/riscv/kernel/acpi.c
arch/riscv/kernel/compat_vdso/Makefile
arch/riscv/kernel/cpu.c
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/crash_core.c
arch/riscv/kernel/elf_kexec.c
arch/riscv/kernel/irq.c
arch/riscv/kernel/ptrace.c
arch/riscv/kernel/smp.c
arch/riscv/kernel/traps.c
arch/riscv/lib/uaccess.S
arch/riscv/mm/init.c
arch/riscv/mm/kasan_init.c
arch/riscv/mm/pageattr.c
arch/riscv/net/bpf_jit.h
arch/riscv/net/bpf_jit_core.c
arch/s390/Kbuild
arch/s390/Kconfig
arch/s390/Makefile
arch/s390/boot/startup.c
arch/s390/configs/debug_defconfig
arch/s390/configs/defconfig
arch/s390/configs/zfcpdump_defconfig
arch/s390/crypto/paes_s390.c
arch/s390/hypfs/Makefile
arch/s390/hypfs/hypfs.h
arch/s390/hypfs/hypfs_dbfs.c
arch/s390/hypfs/hypfs_diag.c
arch/s390/hypfs/hypfs_diag.h [new file with mode: 0644]
arch/s390/hypfs/hypfs_diag_fs.c [new file with mode: 0644]
arch/s390/hypfs/hypfs_vm.c
arch/s390/hypfs/hypfs_vm.h [new file with mode: 0644]
arch/s390/hypfs/hypfs_vm_fs.c [new file with mode: 0644]
arch/s390/hypfs/inode.c
arch/s390/include/asm/Kbuild
arch/s390/include/asm/debug.h
arch/s390/include/asm/diag.h
arch/s390/include/asm/ftrace.h
arch/s390/include/asm/kfence.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/maccess.h
arch/s390/include/asm/page.h
arch/s390/include/asm/pfault.h [new file with mode: 0644]
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/sclp.h
arch/s390/include/asm/setup.h
arch/s390/include/asm/uv.h
arch/s390/include/uapi/asm/pkey.h
arch/s390/include/uapi/asm/ptrace.h
arch/s390/kernel/Makefile
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/cert_store.c [new file with mode: 0644]
arch/s390/kernel/diag.c
arch/s390/kernel/ebcdic.c
arch/s390/kernel/entry.S
arch/s390/kernel/ipl.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/machine_kexec_file.c
arch/s390/kernel/mcount.S
arch/s390/kernel/setup.c
arch/s390/kernel/smp.c
arch/s390/kernel/sthyi.c
arch/s390/kernel/syscalls/syscall.tbl
arch/s390/kernel/uv.c
arch/s390/kvm/intercept.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/pv.c
arch/s390/lib/mem.S
arch/s390/lib/tishift.S
arch/s390/mm/Makefile
arch/s390/mm/cmm.c
arch/s390/mm/dump_pagetables.c
arch/s390/mm/extmem.c
arch/s390/mm/fault.c
arch/s390/mm/gmap.c
arch/s390/mm/maccess.c
arch/s390/mm/pfault.c [new file with mode: 0644]
arch/s390/mm/vmem.c
arch/s390/pci/pci_clp.c
arch/sh/boards/mach-dreamcast/irq.c
arch/sh/boards/mach-highlander/setup.c
arch/sh/boards/mach-r2d/irq.c
arch/sh/cchips/Kconfig
arch/sh/configs/espt_defconfig
arch/sh/configs/sdk7780_defconfig
arch/sh/configs/sdk7786_defconfig
arch/sh/configs/sh03_defconfig
arch/sh/configs/sh7763rdp_defconfig
arch/sh/include/asm/hd64461.h
arch/sh/kernel/syscalls/syscall.tbl
arch/sparc/configs/sparc32_defconfig
arch/sparc/include/asm/cmpxchg_32.h
arch/sparc/include/asm/cmpxchg_64.h
arch/sparc/include/asm/processor_64.h
arch/sparc/kernel/syscalls/syscall.tbl
arch/um/configs/i386_defconfig
arch/um/configs/x86_64_defconfig
arch/um/drivers/mconsole_kern.c
arch/um/drivers/vector_user.c
arch/um/include/shared/user.h
arch/um/kernel/um_arch.c
arch/um/os-Linux/sigio.c
arch/um/os-Linux/umid.c
arch/x86/Kconfig
arch/x86/boot/compressed/Makefile
arch/x86/boot/compressed/efi_mixed.S
arch/x86/boot/compressed/error.c
arch/x86/boot/compressed/error.h
arch/x86/boot/compressed/head_32.S
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/idt_64.c
arch/x86/boot/compressed/misc.c
arch/x86/boot/compressed/misc.h
arch/x86/boot/compressed/pgtable.h
arch/x86/boot/compressed/pgtable_64.c
arch/x86/boot/compressed/sev.c
arch/x86/configs/i386_defconfig
arch/x86/configs/x86_64_defconfig
arch/x86/entry/entry_32.S
arch/x86/entry/entry_64.S
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_64.tbl
arch/x86/entry/vdso/vma.c
arch/x86/events/amd/ibs.c
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/cstate.c
arch/x86/events/intel/ds.c
arch/x86/events/intel/uncore.c
arch/x86/events/intel/uncore_snbep.c
arch/x86/events/msr.c
arch/x86/events/perf_event.h
arch/x86/events/rapl.c
arch/x86/hyperv/hv_apic.c
arch/x86/hyperv/hv_init.c
arch/x86/hyperv/hv_vtl.c
arch/x86/hyperv/ivm.c
arch/x86/hyperv/mmu.c
arch/x86/hyperv/nested.c
arch/x86/include/asm/acpi.h
arch/x86/include/asm/alternative.h
arch/x86/include/asm/boot.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/div64.h
arch/x86/include/asm/efi.h
arch/x86/include/asm/entry-common.h
arch/x86/include/asm/ibt.h
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/linkage.h
arch/x86/include/asm/local.h
arch/x86/include/asm/mem_encrypt.h
arch/x86/include/asm/microcode.h
arch/x86/include/asm/microcode_amd.h [deleted file]
arch/x86/include/asm/microcode_intel.h [deleted file]
arch/x86/include/asm/mshyperv.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/qspinlock.h
arch/x86/include/asm/qspinlock_paravirt.h
arch/x86/include/asm/segment.h
arch/x86/include/asm/sev.h
arch/x86/include/asm/switch_to.h
arch/x86/include/asm/topology.h
arch/x86/include/asm/uv/bios.h
arch/x86/include/asm/xen/page.h
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/alternative.c
arch/x86/kernel/amd_nb.c
arch/x86/kernel/apic/ipi.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cpu.h
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_epb.c
arch/x86/kernel/cpu/mce/amd.c
arch/x86/kernel/cpu/mce/core.c
arch/x86/kernel/cpu/mce/intel.c
arch/x86/kernel/cpu/mce/internal.h
arch/x86/kernel/cpu/microcode/Makefile
arch/x86/kernel/cpu/microcode/amd.c
arch/x86/kernel/cpu/microcode/core.c
arch/x86/kernel/cpu/microcode/intel.c
arch/x86/kernel/cpu/microcode/internal.h [new file with mode: 0644]
arch/x86/kernel/fpu/context.h
arch/x86/kernel/fpu/core.c
arch/x86/kernel/fpu/xstate.c
arch/x86/kernel/ftrace.c
arch/x86/kernel/head_64.S
arch/x86/kernel/hpet.c
arch/x86/kernel/kprobes/opt.c
arch/x86/kernel/kvm.c
arch/x86/kernel/module.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/process.c
arch/x86/kernel/sev.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/static_call.c
arch/x86/kernel/traps.c
arch/x86/kernel/tsc.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/svm/vmenter.S
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx_ops.h
arch/x86/kvm/x86.c
arch/x86/lib/retpoline.S
arch/x86/mm/init.c
arch/x86/mm/mem_encrypt_amd.c
arch/x86/platform/efi/memmap.c
arch/x86/platform/uv/uv_nmi.c
arch/x86/purgatory/purgatory.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/mmu_pv.c
arch/x86/xen/setup.c
arch/x86/xen/xen-head.S
arch/xtensa/kernel/align.S
arch/xtensa/kernel/syscalls/syscall.tbl
arch/xtensa/kernel/traps.c
arch/xtensa/platforms/iss/network.c
block/bdev.c
block/blk-cgroup.c
block/blk-core.c
block/blk-crypto-fallback.c
block/blk-crypto-profile.c
block/blk-flush.c
block/blk-iocost.c
block/blk-mq.c
block/blk-zoned.c
block/disk-events.c
block/elevator.c
block/fops.c
block/genhd.c
block/ioctl.c
block/mq-deadline.c
block/partitions/amiga.c
block/partitions/core.c
crypto/af_alg.c
crypto/algif_hash.c
crypto/asymmetric_keys/public_key.c
drivers/Makefile
drivers/accel/habanalabs/common/habanalabs.h
drivers/accel/ivpu/ivpu_drv.h
drivers/accel/ivpu/ivpu_gem.c
drivers/accel/ivpu/ivpu_hw_mtl.c
drivers/accel/qaic/qaic_control.c
drivers/accel/qaic/qaic_data.c
drivers/acpi/Kconfig
drivers/acpi/Makefile
drivers/acpi/ac.c
drivers/acpi/acpi_cmos_rtc.c
drivers/acpi/acpi_extlog.c
drivers/acpi/acpi_processor.c
drivers/acpi/acpi_tad.c
drivers/acpi/acpi_video.c
drivers/acpi/acpica/acdebug.h
drivers/acpi/acpica/acglobal.h
drivers/acpi/acpica/aclocal.h
drivers/acpi/acpica/acpredef.h
drivers/acpi/acpica/dbcmds.c
drivers/acpi/acpica/dbinput.c
drivers/acpi/acpica/dswstate.c
drivers/acpi/acpica/exserial.c
drivers/acpi/acpica/psopcode.c
drivers/acpi/acpica/utdebug.c
drivers/acpi/arm64/Makefile
drivers/acpi/arm64/amba.c [moved from drivers/acpi/acpi_amba.c with 99% similarity]
drivers/acpi/arm64/init.c
drivers/acpi/arm64/init.h
drivers/acpi/arm64/iort.c
drivers/acpi/battery.c
drivers/acpi/bus.c
drivers/acpi/hed.c
drivers/acpi/internal.h
drivers/acpi/nfit/core.c
drivers/acpi/prmt.c
drivers/acpi/processor_core.c
drivers/acpi/processor_pdc.c
drivers/acpi/resource.c
drivers/acpi/scan.c
drivers/acpi/thermal.c
drivers/acpi/video_detect.c
drivers/acpi/x86/s2idle.c
drivers/acpi/x86/utils.c
drivers/android/binder.c
drivers/android/binder_alloc.c
drivers/android/binder_alloc.h
drivers/android/binderfs.c
drivers/ata/libata-core.c
drivers/ata/libata-scsi.c
drivers/ata/pata_arasan_cf.c
drivers/ata/pata_ns87415.c
drivers/ata/pata_octeon_cf.c
drivers/ata/pata_parport/aten.c
drivers/ata/pata_parport/bpck.c
drivers/ata/pata_parport/bpck6.c
drivers/ata/pata_parport/comm.c
drivers/ata/pata_parport/dstr.c
drivers/ata/pata_parport/epat.c
drivers/ata/pata_parport/epia.c
drivers/ata/pata_parport/fit2.c
drivers/ata/pata_parport/fit3.c
drivers/ata/pata_parport/friq.c
drivers/ata/pata_parport/frpw.c
drivers/ata/pata_parport/kbic.c
drivers/ata/pata_parport/ktti.c
drivers/ata/pata_parport/on20.c
drivers/ata/pata_parport/on26.c
drivers/base/cpu.c
drivers/base/power/power.h
drivers/base/power/wakeirq.c
drivers/base/regmap/regcache-rbtree.c
drivers/base/regmap/regcache.c
drivers/base/regmap/regmap-i2c.c
drivers/base/regmap/regmap-irq.c
drivers/base/regmap/regmap-kunit.c
drivers/base/regmap/regmap-spi-avmm.c
drivers/base/regmap/regmap.c
drivers/block/amiflop.c
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/nbd.c
drivers/block/null_blk/zoned.c
drivers/block/rbd.c
drivers/block/rnbd/rnbd-clt-sysfs.c
drivers/block/ublk_drv.c
drivers/block/virtio_blk.c
drivers/block/zram/zram_drv.c
drivers/bluetooth/btusb.c
drivers/bus/ti-sysc.c
drivers/char/tpm/st33zp24/i2c.c
drivers/char/tpm/tpm-chip.c
drivers/char/tpm/tpm_crb.c
drivers/char/tpm/tpm_i2c_atmel.c
drivers/char/tpm/tpm_i2c_infineon.c
drivers/char/tpm/tpm_i2c_nuvoton.c
drivers/char/tpm/tpm_tis.c
drivers/char/tpm/tpm_tis_core.c
drivers/char/tpm/tpm_tis_core.h
drivers/char/tpm/tpm_tis_i2c.c
drivers/char/tpm/tpm_tis_i2c_cr50.c
drivers/char/tpm/tpm_tis_spi_main.c
drivers/char/tpm/tpm_vtpm_proxy.c
drivers/clk/Kconfig
drivers/clk/clk-devres.c
drivers/clk/imx/clk-imx93.c
drivers/clk/keystone/syscon-clk.c
drivers/clk/mediatek/clk-mt8183.c
drivers/clk/meson/clk-pll.c
drivers/counter/Kconfig
drivers/cpufreq/acpi-cpufreq.c
drivers/cpufreq/amd-pstate-ut.c
drivers/cpufreq/amd-pstate.c
drivers/cpufreq/armada-37xx-cpufreq.c
drivers/cpufreq/brcmstb-avs-cpufreq.c
drivers/cpufreq/cppc_cpufreq.c
drivers/cpufreq/cpufreq-dt-platdev.c
drivers/cpufreq/cpufreq-dt.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_stats.c
drivers/cpufreq/davinci-cpufreq.c
drivers/cpufreq/imx-cpufreq-dt.c
drivers/cpufreq/imx6q-cpufreq.c
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/kirkwood-cpufreq.c
drivers/cpufreq/mediatek-cpufreq-hw.c
drivers/cpufreq/mediatek-cpufreq.c
drivers/cpufreq/omap-cpufreq.c
drivers/cpufreq/pcc-cpufreq.c
drivers/cpufreq/powernow-k8.c
drivers/cpufreq/ppc_cbe_cpufreq.c
drivers/cpufreq/ppc_cbe_cpufreq_pmi.c
drivers/cpufreq/qcom-cpufreq-hw.c
drivers/cpufreq/qcom-cpufreq-nvmem.c
drivers/cpufreq/qoriq-cpufreq.c
drivers/cpufreq/raspberrypi-cpufreq.c
drivers/cpufreq/scpi-cpufreq.c
drivers/cpufreq/sparc-us2e-cpufreq.c
drivers/cpufreq/sparc-us3-cpufreq.c
drivers/cpufreq/sti-cpufreq.c
drivers/cpufreq/sun50i-cpufreq-nvmem.c
drivers/cpufreq/tegra186-cpufreq.c
drivers/cpufreq/tegra194-cpufreq.c
drivers/cpufreq/ti-cpufreq.c
drivers/cpufreq/vexpress-spc-cpufreq.c
drivers/cpuidle/cpuidle-psci-domain.c
drivers/cpuidle/dt_idle_genpd.c
drivers/cpuidle/dt_idle_genpd.h
drivers/cpuidle/governors/gov.h [new file with mode: 0644]
drivers/cpuidle/governors/menu.c
drivers/cpuidle/governors/teo.c
drivers/crypto/Kconfig
drivers/crypto/caam/ctrl.c
drivers/cxl/Kconfig
drivers/cxl/acpi.c
drivers/cxl/core/mbox.c
drivers/cxl/core/memdev.c
drivers/cxl/cxlmem.h
drivers/devfreq/devfreq.c
drivers/devfreq/imx-bus.c
drivers/devfreq/imx8m-ddrc.c
drivers/devfreq/mtk-cci-devfreq.c
drivers/devfreq/tegra30-devfreq.c
drivers/dma-buf/dma-fence-unwrap.c
drivers/dma-buf/dma-fence.c
drivers/dma-buf/dma-resv.c
drivers/dma-buf/sw_sync.c
drivers/dma/Kconfig
drivers/dma/idxd/device.c
drivers/dma/mcf-edma.c
drivers/dma/owl-dma.c
drivers/dma/pl330.c
drivers/dma/xilinx/xdma.c
drivers/edac/amd64_edac.c
drivers/edac/i10nm_base.c
drivers/eisa/eisa-bus.c
drivers/firmware/arm_scmi/mailbox.c
drivers/firmware/arm_scmi/raw_mode.c
drivers/firmware/arm_scmi/smc.c
drivers/firmware/arm_sdei.c
drivers/firmware/efi/libstub/Makefile
drivers/firmware/efi/libstub/arm64-stub.c
drivers/firmware/efi/libstub/efi-stub-helper.c
drivers/firmware/efi/libstub/efistub.h
drivers/firmware/efi/libstub/randomalloc.c
drivers/firmware/efi/libstub/x86-5lvl.c [new file with mode: 0644]
drivers/firmware/efi/libstub/x86-stub.c
drivers/firmware/efi/libstub/x86-stub.h [new file with mode: 0644]
drivers/firmware/efi/libstub/zboot.c
drivers/firmware/efi/riscv-runtime.c
drivers/firmware/efi/runtime-wrappers.c
drivers/firmware/smccc/soc_id.c
drivers/gpio/gpio-mvebu.c
drivers/gpio/gpio-sim.c
drivers/gpio/gpio-tps68470.c
drivers/gpio/gpio-ws16c48.c
drivers/gpio/gpiolib-sysfs.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/amdkfd/kfd_debug.c
drivers/gpu/drm/amd/amdkfd/kfd_debug.h
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
drivers/gpu/drm/amd/display/dc/dcn301/Makefile
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c [new file with mode: 0644]
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h [new file with mode: 0644]
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
drivers/gpu/drm/armada/armada_fbdev.c
drivers/gpu/drm/bridge/ite-it6505.c
drivers/gpu/drm/bridge/lontium-lt9611.c
drivers/gpu/drm/bridge/samsung-dsim.c
drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
drivers/gpu/drm/bridge/ti-sn65dsi86.c
drivers/gpu/drm/drm_atomic.c
drivers/gpu/drm/drm_client.c
drivers/gpu/drm/drm_client_modeset.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_fbdev_dma.c
drivers/gpu/drm/drm_fbdev_generic.c
drivers/gpu/drm/drm_gem_shmem_helper.c
drivers/gpu/drm/drm_probe_helper.c
drivers/gpu/drm/drm_syncobj.c
drivers/gpu/drm/exynos/exynos_drm_fbdev.c
drivers/gpu/drm/gma500/fbdev.c
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/display/intel_display_device.c
drivers/gpu/drm/i915/display/intel_dpt.c
drivers/gpu/drm/i915/display/intel_fbdev.c
drivers/gpu/drm/i915/display/intel_hotplug.c
drivers/gpu/drm/i915/display/intel_sdvo.c
drivers/gpu/drm/i915/gem/selftests/huge_pages.c
drivers/gpu/drm/i915/gt/gen8_engine_cs.c
drivers/gpu/drm/i915/gt/gen8_engine_cs.h
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
drivers/gpu/drm/i915/gt/intel_gpu_commands.h
drivers/gpu/drm/i915/gt/intel_gt_regs.h
drivers/gpu/drm/i915/gt/intel_gtt.c
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
drivers/gpu/drm/i915/gt/uc/intel_huc.c
drivers/gpu/drm/i915/gvt/edid.c
drivers/gpu/drm/i915/i915_active.c
drivers/gpu/drm/i915/i915_driver.c
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c
drivers/gpu/drm/msm/adreno/a5xx_gpu.c
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
drivers/gpu/drm/msm/adreno/adreno_device.c
drivers/gpu/drm/msm/adreno/adreno_gpu.h
drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
drivers/gpu/drm/msm/msm_fbdev.c
drivers/gpu/drm/msm/msm_fence.c
drivers/gpu/drm/msm/msm_gem_submit.c
drivers/gpu/drm/msm/msm_mdss.c
drivers/gpu/drm/nouveau/dispnv50/disp.c
drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
drivers/gpu/drm/nouveau/nouveau_chan.c
drivers/gpu/drm/nouveau/nouveau_chan.h
drivers/gpu/drm/nouveau/nouveau_connector.c
drivers/gpu/drm/nouveau/nouveau_drm.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
drivers/gpu/drm/omapdrm/omap_fbdev.c
drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
drivers/gpu/drm/panel/panel-simple.c
drivers/gpu/drm/panfrost/panfrost_devfreq.c
drivers/gpu/drm/qxl/qxl_drv.h
drivers/gpu/drm/qxl/qxl_dumb.c
drivers/gpu/drm/qxl/qxl_gem.c
drivers/gpu/drm/qxl/qxl_ioctl.c
drivers/gpu/drm/radeon/radeon_fbdev.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.c
drivers/gpu/drm/scheduler/sched_entity.c
drivers/gpu/drm/scheduler/sched_fence.c
drivers/gpu/drm/scheduler/sched_main.c
drivers/gpu/drm/tegra/fbdev.c
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_resource.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
drivers/hid/amd-sfh-hid/sfh1_1/amd_sfh_desc.c
drivers/hid/hid-hyperv.c
drivers/hid/hid-input.c
drivers/hid/hid-logitech-hidpp.c
drivers/hid/hid-nvidia-shield.c
drivers/hv/connection.c
drivers/hv/hv_balloon.c
drivers/hv/hv_common.c
drivers/hwmon/aquacomputer_d5next.c
drivers/hwmon/k10temp.c
drivers/hwmon/nct6775-core.c
drivers/hwmon/nct6775-platform.c
drivers/hwmon/nct6775.h
drivers/hwmon/nct7802.c
drivers/hwmon/oxp-sensors.c
drivers/hwmon/pmbus/bel-pfe.c
drivers/hwmon/pmbus/pmbus_core.c
drivers/i2c/busses/i2c-bcm-iproc.c
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-hisi.c
drivers/i2c/busses/i2c-imx-lpi2c.c
drivers/i2c/busses/i2c-sun6i-p2wi.c
drivers/i2c/busses/i2c-tegra.c
drivers/idle/intel_idle.c
drivers/iio/adc/ad7192.c
drivers/iio/adc/ina2xx-adc.c
drivers/iio/adc/meson_saradc.c
drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
drivers/iio/frequency/admv1013.c
drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
drivers/iio/industrialio-core.c
drivers/iio/light/rohm-bu27008.c
drivers/iio/light/rohm-bu27034.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/umem.c
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/bnxt_re/qplib_fp.c
drivers/infiniband/hw/bnxt_re/qplib_fp.h
drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
drivers/infiniband/hw/bnxt_re/qplib_res.c
drivers/infiniband/hw/hfi1/chip.c
drivers/infiniband/hw/irdma/ctrl.c
drivers/infiniband/hw/irdma/defs.h
drivers/infiniband/hw/irdma/hw.c
drivers/infiniband/hw/irdma/main.h
drivers/infiniband/hw/irdma/puda.c
drivers/infiniband/hw/irdma/type.h
drivers/infiniband/hw/irdma/uk.c
drivers/infiniband/hw/irdma/utils.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/qib/qib_fs.c
drivers/infiniband/sw/rxe/rxe_mw.c
drivers/interconnect/qcom/bcm-voter.c
drivers/interconnect/qcom/icc-rpmh.h
drivers/interconnect/qcom/sa8775p.c
drivers/interconnect/qcom/sm8450.c
drivers/interconnect/qcom/sm8550.c
drivers/iommu/iommu-sva.c
drivers/iommu/iommu.c
drivers/iommu/iommufd/device.c
drivers/iommu/iommufd/iommufd_private.h
drivers/iommu/iommufd/main.c
drivers/iommu/iommufd/pages.c
drivers/irqchip/irq-bcm6345-l1.c
drivers/irqchip/irq-bcm7038-l1.c
drivers/irqchip/irq-brcmstb-l2.c
drivers/irqchip/irq-gic-pm.c
drivers/irqchip/irq-gic-v3-its-fsl-mc-msi.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-i8259.c
drivers/irqchip/irq-imx-intmux.c
drivers/irqchip/irq-imx-irqsteer.c
drivers/irqchip/irq-imx-mu-msi.c
drivers/irqchip/irq-keystone.c
drivers/irqchip/irq-loongson-eiointc.c
drivers/irqchip/irq-loongson-htvec.c
drivers/irqchip/irq-loongson-pch-pic.c
drivers/irqchip/irq-ls-scfg-msi.c
drivers/irqchip/irq-madera.c
drivers/irqchip/irq-meson-gpio.c
drivers/irqchip/irq-mips-gic.c
drivers/irqchip/irq-mvebu-sei.c
drivers/irqchip/irq-orion.c
drivers/irqchip/irq-pruss-intc.c
drivers/irqchip/irq-qcom-mpm.c
drivers/irqchip/irq-renesas-intc-irqpin.c
drivers/irqchip/irq-st.c
drivers/irqchip/irq-stm32-exti.c
drivers/irqchip/irq-sunxi-nmi.c
drivers/irqchip/irq-tb10x.c
drivers/irqchip/irq-ti-sci-inta.c
drivers/irqchip/irq-ti-sci-intr.c
drivers/irqchip/irq-uniphier-aidet.c
drivers/irqchip/irq-xtensa-pic.c
drivers/irqchip/irqchip.c
drivers/irqchip/qcom-pdc.c
drivers/isdn/hardware/mISDN/hfcpci.c
drivers/isdn/mISDN/dsp.h
drivers/isdn/mISDN/dsp_cmx.c
drivers/isdn/mISDN/dsp_core.c
drivers/leds/trigger/ledtrig-netdev.c
drivers/md/dm-cache-policy-smq.c
drivers/md/dm-integrity.c
drivers/md/dm-raid.c
drivers/md/md.c
drivers/media/cec/usb/pulse8/pulse8-cec.c
drivers/media/i2c/tc358746.c
drivers/media/pci/cx23885/cx23885-dvb.c
drivers/media/platform/amphion/vpu_core.c
drivers/media/platform/amphion/vpu_mbox.c
drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
drivers/media/platform/mediatek/jpeg/mtk_jpeg_dec_hw.c
drivers/media/platform/mediatek/jpeg/mtk_jpeg_enc_hw.c
drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c
drivers/media/platform/mediatek/vcodec/vdec_msg_queue.c
drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h
drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c
drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h
drivers/media/platform/nxp/imx7-media-csi.c
drivers/media/platform/qcom/venus/hfi_cmds.c
drivers/media/platform/verisilicon/hantro.h
drivers/media/platform/verisilicon/hantro_postproc.c
drivers/media/usb/uvc/uvc_v4l2.c
drivers/memory/tegra/mc.c
drivers/memory/tegra/tegra194.c
drivers/memory/tegra/tegra234.c
drivers/misc/cardreader/rts5227.c
drivers/misc/cardreader/rts5228.c
drivers/misc/cardreader/rts5249.c
drivers/misc/cardreader/rts5260.c
drivers/misc/cardreader/rts5261.c
drivers/misc/cardreader/rtsx_pcr.c
drivers/misc/ibmasm/ibmasmfs.c
drivers/misc/ibmvmc.c
drivers/misc/lkdtm/bugs.c
drivers/misc/sram.c
drivers/misc/tps6594-esm.c
drivers/mmc/core/block.c
drivers/mmc/host/moxart-mmc.c
drivers/mmc/host/sdhci_f_sdh30.c
drivers/mmc/host/sunplus-mmc.c
drivers/mmc/host/wbsd.c
drivers/mtd/nand/raw/fsl_upm.c
drivers/mtd/nand/raw/meson_nand.c
drivers/mtd/nand/raw/omap_elm.c
drivers/mtd/nand/raw/rockchip-nand-controller.c
drivers/mtd/nand/spi/toshiba.c
drivers/mtd/nand/spi/winbond.c
drivers/mtd/spi-nor/spansion.c
drivers/net/bonding/bond_alb.c
drivers/net/bonding/bond_main.c
drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
drivers/net/can/spi/mcp251xfd/mcp251xfd.h
drivers/net/can/usb/gs_usb.c
drivers/net/can/vxcan.c
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/microchip/ksz8795.c
drivers/net/dsa/microchip/ksz_common.c
drivers/net/dsa/microchip/ksz_common.h
drivers/net/dsa/mt7530.c
drivers/net/dsa/mt7530.h
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/ocelot/felix.c
drivers/net/dsa/ocelot/felix.h
drivers/net/dsa/ocelot/felix_vsc9959.c
drivers/net/dsa/qca/ar9331.c
drivers/net/dsa/qca/qca8k-8xxx.c
drivers/net/dsa/qca/qca8k-common.c
drivers/net/ethernet/amazon/ena/ena_com.c
drivers/net/ethernet/atheros/atl1c/atl1c_main.c
drivers/net/ethernet/atheros/atl1e/atl1e_main.c
drivers/net/ethernet/atheros/atlx/atl1.c
drivers/net/ethernet/broadcom/b44.c
drivers/net/ethernet/broadcom/bgmac.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
drivers/net/ethernet/broadcom/genet/bcmmii.c
drivers/net/ethernet/broadcom/tg3.c
drivers/net/ethernet/brocade/bna/bnad_debugfs.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/freescale/fec.h
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/google/gve/gve.h
drivers/net/ethernet/google/gve/gve_ethtool.c
drivers/net/ethernet/google/gve/gve_main.c
drivers/net/ethernet/hisilicon/hns3/hnae3.h
drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
drivers/net/ethernet/ibm/ibmveth.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/i40e/i40e_debugfs.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_nvm.c
drivers/net/ethernet/intel/iavf/iavf.h
drivers/net/ethernet/intel/iavf/iavf_ethtool.c
drivers/net/ethernet/intel/iavf/iavf_fdir.c
drivers/net/ethernet/intel/iavf/iavf_fdir.h
drivers/net/ethernet/intel/iavf/iavf_main.c
drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
drivers/net/ethernet/intel/ice/ice_base.c
drivers/net/ethernet/intel/ice/ice_eswitch.c
drivers/net/ethernet/intel/ice/ice_ethtool.c
drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
drivers/net/ethernet/intel/ice/ice_lib.c
drivers/net/ethernet/intel/ice/ice_main.c
drivers/net/ethernet/intel/ice/ice_sriov.c
drivers/net/ethernet/intel/ice/ice_tc_lib.c
drivers/net/ethernet/intel/ice/ice_tc_lib.h
drivers/net/ethernet/intel/ice/ice_vf_lib.c
drivers/net/ethernet/intel/ice/ice_vf_lib.h
drivers/net/ethernet/intel/ice/ice_virtchnl.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/igc/igc.h
drivers/net/ethernet/intel/igc/igc_defines.h
drivers/net/ethernet/intel/igc/igc_ethtool.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/igc/igc_ptp.c
drivers/net/ethernet/intel/igc/igc_tsn.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/korina.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
drivers/net/ethernet/marvell/octeon_ep/octep_main.c
drivers/net/ethernet/marvell/octeontx2/af/ptp.c
drivers/net/ethernet/marvell/octeontx2/af/rvu.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h
drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
drivers/net/ethernet/marvell/prestera/prestera_pci.c
drivers/net/ethernet/marvell/prestera/prestera_router.c
drivers/net/ethernet/mediatek/mtk_eth_soc.c
drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
drivers/net/ethernet/mediatek/mtk_wed.c
drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c
drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
drivers/net/ethernet/mellanox/mlx5/core/sriov.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
drivers/net/ethernet/mellanox/mlx5/core/thermal.c
drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
drivers/net/ethernet/mellanox/mlxsw/pci.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
drivers/net/ethernet/microchip/Kconfig
drivers/net/ethernet/microsoft/mana/mana_en.c
drivers/net/ethernet/mscc/ocelot.c
drivers/net/ethernet/mscc/ocelot_fdma.c
drivers/net/ethernet/mscc/ocelot_mm.c
drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
drivers/net/ethernet/pensando/ionic/ionic_lif.c
drivers/net/ethernet/qlogic/qed/qed_dev_api.h
drivers/net/ethernet/qlogic/qed/qed_fcoe.c
drivers/net/ethernet/qlogic/qed/qed_fcoe.h
drivers/net/ethernet/qlogic/qed/qed_hw.c
drivers/net/ethernet/qlogic/qed/qed_iscsi.c
drivers/net/ethernet/qlogic/qed/qed_iscsi.h
drivers/net/ethernet/qlogic/qed/qed_l2.c
drivers/net/ethernet/qlogic/qed/qed_l2.h
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/qlogic/qede/qede_main.c
drivers/net/ethernet/qualcomm/emac/emac-mac.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/sfc/ef100_nic.c
drivers/net/ethernet/sfc/falcon/selftest.c
drivers/net/ethernet/sfc/selftest.c
drivers/net/ethernet/sfc/siena/selftest.c
drivers/net/ethernet/sfc/tc.c
drivers/net/ethernet/socionext/netsec.c
drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
drivers/net/ethernet/ti/cpsw_ale.c
drivers/net/ethernet/wangxun/libwx/wx_hw.c
drivers/net/ethernet/wangxun/txgbe/txgbe_hw.c
drivers/net/ethernet/xilinx/ll_temac_main.c
drivers/net/ipa/ipa_table.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/macsec.c
drivers/net/macvlan.c
drivers/net/mdio/mdio-bitbang.c
drivers/net/netdevsim/dev.c
drivers/net/pcs/pcs-rzn1-miic.c
drivers/net/phy/at803x.c
drivers/net/phy/broadcom.c
drivers/net/phy/marvell10g.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/sfp-bus.c
drivers/net/tap.c
drivers/net/team/team.c
drivers/net/tun.c
drivers/net/usb/cdc_ether.c
drivers/net/usb/lan78xx.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/usbnet.c
drivers/net/usb/zaurus.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vrf.c
drivers/net/vxlan/vxlan_core.c
drivers/net/vxlan/vxlan_vnifilter.c
drivers/net/wireguard/allowedips.c
drivers/net/wireguard/selftest/allowedips.c
drivers/net/wireless/ath/ath11k/ahb.c
drivers/net/wireless/ath/ath11k/pcic.c
drivers/net/wireless/ath/ath12k/wmi.c
drivers/net/wireless/ath/ath6kl/Makefile
drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
drivers/net/wireless/cisco/airo.c
drivers/net/wireless/intel/iwlwifi/Kconfig
drivers/net/wireless/intel/iwlwifi/cfg/22000.c
drivers/net/wireless/intel/iwlwifi/iwl-config.h
drivers/net/wireless/intel/iwlwifi/iwl-fh.h
drivers/net/wireless/intel/iwlwifi/iwl-trans.c
drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
drivers/net/wireless/intel/iwlwifi/pcie/trans.c
drivers/net/wireless/intel/iwlwifi/pcie/tx.c
drivers/net/wireless/intel/iwlwifi/queue/tx.c
drivers/net/wireless/intel/iwlwifi/queue/tx.h
drivers/net/wireless/legacy/rayctl.h
drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
drivers/net/wireless/mediatek/mt76/mt7921/dma.c
drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
drivers/net/wireless/mediatek/mt76/mt7921/pci.c
drivers/net/wireless/realtek/rtw89/debug.c
drivers/net/wireless/realtek/rtw89/mac.c
drivers/net/xen-netback/netback.c
drivers/nvme/host/core.c
drivers/nvme/host/fault_inject.c
drivers/nvme/host/fc.c
drivers/nvme/host/ioctl.c
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/sysfs.c
drivers/nvme/host/tcp.c
drivers/nvme/host/zns.c
drivers/nvme/target/loop.c
drivers/nvme/target/passthru.c
drivers/of/Kconfig
drivers/of/dynamic.c
drivers/of/kexec.c
drivers/of/platform.c
drivers/of/unittest.c
drivers/opp/core.c
drivers/opp/cpu.c
drivers/parisc/sba_iommu.c
drivers/parport/parport_gsc.c
drivers/parport/parport_gsc.h
drivers/pci/bus.c
drivers/pci/controller/Kconfig
drivers/pci/controller/dwc/pcie-designware-host.c
drivers/pci/controller/dwc/pcie-designware.c
drivers/pci/controller/dwc/pcie-designware.h
drivers/pci/hotplug/acpiphp_glue.c
drivers/pci/of.c
drivers/perf/Kconfig
drivers/perf/alibaba_uncore_drw_pmu.c
drivers/perf/amlogic/meson_ddr_pmu_core.c
drivers/perf/arm-cci.c
drivers/perf/arm-cmn.c
drivers/perf/arm_dmc620_pmu.c
drivers/perf/arm_dsu_pmu.c
drivers/perf/arm_pmu.c
drivers/perf/arm_pmu_acpi.c
drivers/perf/arm_pmu_platform.c
drivers/perf/arm_pmuv3.c
drivers/perf/arm_smmuv3_pmu.c
drivers/perf/arm_spe_pmu.c
drivers/perf/fsl_imx8_ddr_perf.c
drivers/perf/fsl_imx9_ddr_perf.c
drivers/perf/hisilicon/hisi_pcie_pmu.c
drivers/perf/marvell_cn10k_ddr_pmu.c
drivers/perf/marvell_cn10k_tad_pmu.c
drivers/perf/riscv_pmu.c
drivers/perf/xgene_pmu.c
drivers/phy/hisilicon/phy-hisi-inno-usb2.c
drivers/phy/mediatek/phy-mtk-dp.c
drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
drivers/pinctrl/pinctrl-amd.c
drivers/pinctrl/pinctrl-amd.h
drivers/pinctrl/qcom/pinctrl-msm.c
drivers/pinctrl/qcom/pinctrl-msm.h
drivers/pinctrl/qcom/pinctrl-sa8775p.c
drivers/pinctrl/renesas/pinctrl-rza2.c
drivers/pinctrl/renesas/pinctrl-rzg2l.c
drivers/pinctrl/renesas/pinctrl-rzv2m.c
drivers/platform/chrome/chromeos_acpi.c
drivers/platform/chrome/cros_ec_lpc.c
drivers/platform/mellanox/mlxbf-tmfifo.c
drivers/platform/x86/amd/Makefile
drivers/platform/x86/amd/pmc-quirks.c [new file with mode: 0644]
drivers/platform/x86/amd/pmc.c
drivers/platform/x86/amd/pmc.h [new file with mode: 0644]
drivers/platform/x86/amd/pmf/acpi.c
drivers/platform/x86/amd/pmf/core.c
drivers/platform/x86/amd/pmf/pmf.h
drivers/platform/x86/amd/pmf/sps.c
drivers/platform/x86/asus-wmi.c
drivers/platform/x86/dell/dell-wmi-ddv.c
drivers/platform/x86/huawei-wmi.c
drivers/platform/x86/ideapad-laptop.c
drivers/platform/x86/intel/hid.c
drivers/platform/x86/intel/ifs/load.c
drivers/platform/x86/intel/int3472/clk_and_regulator.c
drivers/platform/x86/intel/pmc/core.c
drivers/platform/x86/intel/speed_select_if/isst_if_common.c
drivers/platform/x86/intel/tpmi.c
drivers/platform/x86/lenovo-ymc.c
drivers/platform/x86/mlx-platform.c
drivers/platform/x86/msi-ec.c
drivers/platform/x86/msi-laptop.c
drivers/platform/x86/serial-multi-instantiate.c
drivers/platform/x86/think-lmi.c
drivers/platform/x86/thinkpad_acpi.c
drivers/platform/x86/touchscreen_dmi.c
drivers/platform/x86/wmi.c
drivers/pnp/pnpacpi/core.c
drivers/powercap/arm_scmi_powercap.c
drivers/powercap/intel_rapl_common.c
drivers/powercap/intel_rapl_msr.c
drivers/powercap/intel_rapl_tpmi.c
drivers/regulator/da9063-regulator.c
drivers/regulator/mt6358-regulator.c
drivers/regulator/qcom-rpmh-regulator.c
drivers/s390/block/dasd.c
drivers/s390/block/dasd_3990_erp.c
drivers/s390/block/dasd_ioctl.c
drivers/s390/block/dcssblk.c
drivers/s390/block/scm_blk.c
drivers/s390/char/sclp_cmd.c
drivers/s390/char/sclp_early.c
drivers/s390/char/vmcp.c
drivers/s390/char/zcore.c
drivers/s390/crypto/Makefile
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_bus.h
drivers/s390/crypto/ap_queue.c
drivers/s390/crypto/pkey_api.c
drivers/s390/crypto/vfio_ap_ops.c
drivers/s390/crypto/vfio_ap_private.h
drivers/s390/crypto/zcrypt_cex2a.c
drivers/s390/crypto/zcrypt_cex2a.h
drivers/s390/crypto/zcrypt_cex2c.c
drivers/s390/crypto/zcrypt_cex2c.h
drivers/s390/crypto/zcrypt_ep11misc.c
drivers/s390/crypto/zcrypt_ep11misc.h
drivers/s390/crypto/zcrypt_msgtype50.c
drivers/s390/crypto/zcrypt_msgtype50.h
drivers/s390/crypto/zcrypt_msgtype6.c
drivers/s390/net/ism_drv.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_l2_main.c
drivers/s390/net/qeth_l3_main.c
drivers/s390/scsi/zfcp_fc.c
drivers/scsi/53c700.c
drivers/scsi/aacraid/aacraid.h
drivers/scsi/fnic/fnic.h
drivers/scsi/fnic/fnic_scsi.c
drivers/scsi/fnic/fnic_trace.c
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/pm8001/pm8001_init.c
drivers/scsi/qedf/qedf_main.c
drivers/scsi/qedi/qedi_main.c
drivers/scsi/qla2xxx/qla_def.h
drivers/scsi/qla2xxx/qla_init.c
drivers/scsi/qla2xxx/qla_iocb.c
drivers/scsi/raid_class.c
drivers/scsi/scsi_debug.c
drivers/scsi/scsi_proc.c
drivers/scsi/sd.c
drivers/scsi/sd_zbc.c
drivers/scsi/sg.c
drivers/scsi/snic/snic_disc.c
drivers/scsi/storvsc_drv.c
drivers/soc/aspeed/aspeed-socinfo.c
drivers/soc/aspeed/aspeed-uart-routing.c
drivers/soc/fsl/qe/qe.c
drivers/soc/imx/imx8mp-blk-ctrl.c
drivers/soundwire/amd_manager.c
drivers/soundwire/bus.c
drivers/soundwire/qcom.c
drivers/spi/spi-bcm63xx.c
drivers/spi/spi-cadence.c
drivers/spi/spi-qcom-qspi.c
drivers/spi/spi-s3c64xx.c
drivers/spi/spi-stm32.c
drivers/staging/fbtft/fb_ili9341.c
drivers/staging/ks7010/ks_wlan_net.c
drivers/staging/media/atomisp/Kconfig
drivers/staging/rtl8712/rtl871x_xmit.c
drivers/staging/rtl8712/xmit_linux.c
drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
drivers/thermal/intel/intel_tcc_cooling.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_core.h
drivers/thermal/thermal_of.c
drivers/thermal/thermal_trip.c
drivers/thunderbolt/tb.c
drivers/thunderbolt/tmu.c
drivers/tty/Kconfig
drivers/tty/n_gsm.c
drivers/tty/serial/8250/8250_core.c
drivers/tty/serial/8250/8250_dwlib.c
drivers/tty/serial/8250/8250_port.c
drivers/tty/serial/fsl_lpuart.c
drivers/tty/serial/qcom_geni_serial.c
drivers/tty/serial/serial_base.h
drivers/tty/serial/serial_base_bus.c
drivers/tty/serial/sh-sci.c
drivers/tty/serial/sifive.c
drivers/tty/serial/ucc_uart.c
drivers/tty/tty_io.c
drivers/ufs/core/ufs-mcq.c
drivers/ufs/core/ufshcd.c
drivers/ufs/host/Kconfig
drivers/ufs/host/ufs-qcom.c
drivers/ufs/host/ufs-renesas.c
drivers/usb/cdns3/cdns3-gadget.c
drivers/usb/common/usb-conn-gpio.c
drivers/usb/core/devio.c
drivers/usb/core/quirks.c
drivers/usb/dwc3/core.c
drivers/usb/dwc3/core.h
drivers/usb/dwc3/dwc3-pci.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/composite.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/legacy/raw_gadget.c
drivers/usb/gadget/udc/core.c
drivers/usb/gadget/udc/tegra-xudc.c
drivers/usb/host/ohci-at91.c
drivers/usb/host/xhci-mtk.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci-tegra.c
drivers/usb/misc/ehset.c
drivers/usb/serial/option.c
drivers/usb/serial/usb-serial-simple.c
drivers/usb/storage/alauda.c
drivers/usb/typec/altmodes/displayport.c
drivers/usb/typec/class.c
drivers/usb/typec/mux/Kconfig
drivers/usb/typec/mux/nb7vpq904m.c
drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
drivers/usb/typec/tcpm/tcpm.c
drivers/usb/typec/ucsi/ucsi.c
drivers/vdpa/mlx5/core/mlx5_vdpa.h
drivers/vdpa/mlx5/core/mr.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/pds/Makefile
drivers/vdpa/pds/debugfs.c
drivers/vdpa/pds/vdpa_dev.c
drivers/vdpa/pds/vdpa_dev.h
drivers/vdpa/vdpa.c
drivers/vdpa/vdpa_user/vduse_dev.c
drivers/vhost/scsi.c
drivers/video/console/sticon.c
drivers/video/console/vgacon.c
drivers/video/fbdev/amifb.c
drivers/video/fbdev/atmel_lcdfb.c
drivers/video/fbdev/au1200fb.c
drivers/video/fbdev/bw2.c
drivers/video/fbdev/cg14.c
drivers/video/fbdev/cg3.c
drivers/video/fbdev/cg6.c
drivers/video/fbdev/core/fbcon.c
drivers/video/fbdev/ep93xx-fb.c
drivers/video/fbdev/ffb.c
drivers/video/fbdev/goldfishfb.c
drivers/video/fbdev/grvga.c
drivers/video/fbdev/imxfb.c
drivers/video/fbdev/kyro/STG4000InitDevice.c
drivers/video/fbdev/leo.c
drivers/video/fbdev/mb862xx/mb862xxfb_accel.c
drivers/video/fbdev/mb862xx/mb862xxfbdrv.c
drivers/video/fbdev/mmp/hw/mmp_ctrl.c
drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
drivers/video/fbdev/p9100.c
drivers/video/fbdev/platinumfb.c
drivers/video/fbdev/sbuslib.c
drivers/video/fbdev/ssd1307fb.c
drivers/video/fbdev/sunxvr1000.c
drivers/video/fbdev/sunxvr2500.c
drivers/video/fbdev/sunxvr500.c
drivers/video/fbdev/tcx.c
drivers/video/fbdev/xilinxfb.c
drivers/virtio/virtio_mem.c
drivers/virtio/virtio_mmio.c
drivers/virtio/virtio_pci_common.c
drivers/virtio/virtio_pci_legacy.c
drivers/virtio/virtio_vdpa.c
drivers/xen/Kconfig
drivers/xen/events/events_base.c
drivers/xen/evtchn.c
drivers/xen/grant-dma-ops.c
drivers/xen/grant-table.c
drivers/xen/privcmd.c
drivers/xen/xen-acpi-processor.c
drivers/xen/xen-pciback/conf_space_quirks.h
drivers/xen/xen-pciback/pciback.h
drivers/xen/xenbus/xenbus_probe.c
drivers/xen/xenbus/xenbus_probe_frontend.c
drivers/xen/xenbus/xenbus_xs.c
drivers/zorro/names.c
fs/9p/fid.h
fs/9p/v9fs.c
fs/9p/v9fs.h
fs/9p/vfs_dir.c
fs/9p/vfs_file.c
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/Kconfig
fs/adfs/inode.c
fs/affs/amigaffs.c
fs/affs/file.c
fs/affs/inode.c
fs/affs/namei.c
fs/afs/dynroot.c
fs/afs/inode.c
fs/aio.c
fs/attr.c
fs/autofs/Kconfig
fs/autofs/inode.c
fs/autofs/root.c
fs/autofs/waitq.c
fs/bad_inode.c
fs/befs/linuxvfs.c
fs/bfs/dir.c
fs/bfs/inode.c
fs/binfmt_misc.c
fs/btrfs/Kconfig
fs/btrfs/accessors.h
fs/btrfs/backref.c
fs/btrfs/block-group.c
fs/btrfs/block-group.h
fs/btrfs/block-rsv.c
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-inode.h
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-io-tree.c
fs/btrfs/extent-io-tree.h
fs/btrfs/extent-tree.c
fs/btrfs/extent-tree.h
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/extent_map.c
fs/btrfs/file-item.c
fs/btrfs/file-item.h
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-tree.c
fs/btrfs/fs.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/messages.c
fs/btrfs/messages.h
fs/btrfs/ordered-data.c
fs/btrfs/print-tree.c
fs/btrfs/qgroup.c
fs/btrfs/raid56.c
fs/btrfs/raid56.h
fs/btrfs/reflink.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/space-info.c
fs/btrfs/super.c
fs/btrfs/sysfs.c
fs/btrfs/tests/extent-io-tests.c
fs/btrfs/tests/extent-map-tests.c
fs/btrfs/transaction.c
fs/btrfs/tree-checker.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/btrfs/xattr.c
fs/btrfs/zoned.c
fs/btrfs/zoned.h
fs/buffer.c
fs/cachefiles/io.c
fs/ceph/acl.c
fs/ceph/caps.c
fs/ceph/dir.c
fs/ceph/inode.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/ceph/metric.c
fs/ceph/snap.c
fs/ceph/super.c
fs/ceph/xattr.c
fs/coda/coda_linux.c
fs/coda/dir.c
fs/coda/file.c
fs/coda/inode.c
fs/configfs/inode.c
fs/cramfs/inode.c
fs/dcache.c
fs/debugfs/inode.c
fs/devpts/inode.c
fs/ecryptfs/crypto.c
fs/ecryptfs/inode.c
fs/ecryptfs/mmap.c
fs/ecryptfs/read_write.c
fs/efivarfs/file.c
fs/efivarfs/inode.c
fs/efs/inode.c
fs/erofs/Kconfig
fs/erofs/Makefile
fs/erofs/compress.h
fs/erofs/decompressor.c
fs/erofs/decompressor_deflate.c [new file with mode: 0644]
fs/erofs/erofs_fs.h
fs/erofs/inode.c
fs/erofs/internal.h
fs/erofs/super.c
fs/erofs/xattr.c
fs/erofs/zdata.c
fs/erofs/zmap.c
fs/eventfd.c
fs/eventpoll.c
fs/exfat/balloc.c
fs/exfat/dir.c
fs/exfat/exfat_fs.h
fs/exfat/file.c
fs/exfat/inode.c
fs/exfat/namei.c
fs/exfat/super.c
fs/exportfs/expfs.c
fs/ext2/acl.c
fs/ext2/dir.c
fs/ext2/ialloc.c
fs/ext2/inode.c
fs/ext2/ioctl.c
fs/ext2/namei.c
fs/ext2/super.c
fs/ext2/xattr.c
fs/ext4/acl.c
fs/ext4/ext4.h
fs/ext4/ext4_jbd2.c
fs/ext4/extents.c
fs/ext4/ialloc.c
fs/ext4/inline.c
fs/ext4/inode-test.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/f2fs/compress.c
fs/f2fs/dir.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/gc.c
fs/f2fs/inline.c
fs/f2fs/inode.c
fs/f2fs/namei.c
fs/f2fs/recovery.c
fs/f2fs/super.c
fs/f2fs/xattr.c
fs/fat/fat.h
fs/fat/file.c
fs/fat/inode.c
fs/fat/misc.c
fs/fcntl.c
fs/file.c
fs/file_table.c
fs/freevxfs/vxfs_inode.c
fs/fs-writeback.c
fs/fs_context.c
fs/fsopen.c
fs/fuse/control.c
fs/fuse/dir.c
fs/fuse/inode.c
fs/fuse/ioctl.c
fs/gfs2/acl.c
fs/gfs2/aops.c
fs/gfs2/bmap.c
fs/gfs2/dir.c
fs/gfs2/file.c
fs/gfs2/glops.c
fs/gfs2/inode.c
fs/gfs2/quota.c
fs/gfs2/super.c
fs/gfs2/sys.c
fs/gfs2/trans.c
fs/gfs2/xattr.c
fs/hfs/catalog.c
fs/hfs/dir.c
fs/hfs/inode.c
fs/hfs/sysdep.c
fs/hfsplus/catalog.c
fs/hfsplus/dir.c
fs/hfsplus/inode.c
fs/hostfs/hostfs_kern.c
fs/hpfs/dir.c
fs/hpfs/inode.c
fs/hpfs/namei.c
fs/hpfs/super.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/internal.h
fs/ioctl.c
fs/iomap/buffered-io.c
fs/iomap/direct-io.c
fs/isofs/inode.c
fs/isofs/rock.c
fs/jbd2/checkpoint.c
fs/jbd2/commit.c
fs/jbd2/transaction.c
fs/jffs2/dir.c
fs/jffs2/file.c
fs/jffs2/fs.c
fs/jffs2/os-linux.h
fs/jfs/acl.c
fs/jfs/inode.c
fs/jfs/ioctl.c
fs/jfs/jfs_imap.c
fs/jfs/jfs_inode.c
fs/jfs/namei.c
fs/jfs/super.c
fs/jfs/xattr.c
fs/kernel_read_file.c
fs/kernfs/dir.c
fs/kernfs/inode.c
fs/libfs.c
fs/locks.c
fs/minix/bitmap.c
fs/minix/dir.c
fs/minix/inode.c
fs/minix/itree_common.c
fs/minix/namei.c
fs/namei.c
fs/nfs/callback_proc.c
fs/nfs/direct.c
fs/nfs/fscache.h
fs/nfs/inode.c
fs/nfs/namespace.c
fs/nfs/nfs42proc.c
fs/nfs/nfs4_fs.h
fs/nfs/nfs4file.c
fs/nfs/nfs4proc.c
fs/nfs/sysfs.c
fs/nfsd/nfs4state.c
fs/nfsd/nfsctl.c
fs/nfsd/vfs.c
fs/nilfs2/dir.c
fs/nilfs2/inode.c
fs/nilfs2/ioctl.c
fs/nilfs2/namei.c
fs/nilfs2/segment.c
fs/nilfs2/super.c
fs/nilfs2/the_nilfs.h
fs/nls/nls_base.c
fs/notify/dnotify/dnotify.c
fs/nsfs.c
fs/ntfs/dir.c
fs/ntfs/inode.c
fs/ntfs/mft.c
fs/ntfs3/file.c
fs/ntfs3/frecord.c
fs/ntfs3/inode.c
fs/ntfs3/namei.c
fs/ntfs3/super.c
fs/ntfs3/xattr.c
fs/ocfs2/acl.c
fs/ocfs2/alloc.c
fs/ocfs2/aops.c
fs/ocfs2/dir.c
fs/ocfs2/dlmfs/dlmfs.c
fs/ocfs2/dlmglue.c
fs/ocfs2/file.c
fs/ocfs2/inode.c
fs/ocfs2/journal.c
fs/ocfs2/move_extents.c
fs/ocfs2/namei.c
fs/ocfs2/refcounttree.c
fs/ocfs2/xattr.c
fs/omfs/dir.c
fs/omfs/inode.c
fs/open.c
fs/openpromfs/inode.c
fs/orangefs/inode.c
fs/orangefs/namei.c
fs/orangefs/orangefs-kernel.h
fs/orangefs/orangefs-utils.c
fs/overlayfs/file.c
fs/overlayfs/inode.c
fs/overlayfs/overlayfs.h
fs/overlayfs/readdir.c
fs/overlayfs/super.c
fs/overlayfs/util.c
fs/pipe.c
fs/posix_acl.c
fs/proc/base.c
fs/proc/fd.c
fs/proc/generic.c
fs/proc/inode.c
fs/proc/kcore.c
fs/proc/proc_net.c
fs/proc/proc_sysctl.c
fs/proc/root.c
fs/proc/self.c
fs/proc/task_mmu.c
fs/proc/thread_self.c
fs/proc/vmcore.c
fs/pstore/Kconfig
fs/pstore/inode.c
fs/pstore/platform.c
fs/pstore/ram.c
fs/pstore/ram_core.c
fs/qnx4/inode.c
fs/qnx6/inode.c
fs/quota/dquot.c
fs/ramfs/inode.c
fs/read_write.c
fs/readdir.c
fs/reiserfs/inode.c
fs/reiserfs/ioctl.c
fs/reiserfs/journal.c
fs/reiserfs/namei.c
fs/reiserfs/stree.c
fs/reiserfs/super.c
fs/reiserfs/xattr.c
fs/reiserfs/xattr_acl.c
fs/romfs/super.c
fs/smb/client/cifs_debug.c
fs/smb/client/cifsfs.c
fs/smb/client/cifsfs.h
fs/smb/client/cifsglob.h
fs/smb/client/cifssmb.c
fs/smb/client/connect.c
fs/smb/client/dfs.c
fs/smb/client/file.c
fs/smb/client/fs_context.c
fs/smb/client/fscache.h
fs/smb/client/inode.c
fs/smb/client/ioctl.c
fs/smb/client/misc.c
fs/smb/client/sess.c
fs/smb/client/smb2ops.c
fs/smb/client/smb2pdu.c
fs/smb/client/smb2transport.c
fs/smb/server/ksmbd_netlink.h
fs/smb/server/server.c
fs/smb/server/smb2misc.c
fs/smb/server/smb2pdu.c
fs/smb/server/smb_common.c
fs/smb/server/smb_common.h
fs/smb/server/vfs.c
fs/smb/server/vfs.h
fs/splice.c
fs/squashfs/inode.c
fs/stack.c
fs/stat.c
fs/super.c
fs/sysv/dir.c
fs/sysv/ialloc.c
fs/sysv/inode.c
fs/sysv/itree.c
fs/sysv/namei.c
fs/tracefs/inode.c
fs/ubifs/debug.c
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/ioctl.c
fs/ubifs/journal.c
fs/ubifs/super.c
fs/ubifs/ubifs.h
fs/ubifs/xattr.c
fs/udf/ialloc.c
fs/udf/inode.c
fs/udf/namei.c
fs/udf/symlink.c
fs/ufs/dir.c
fs/ufs/ialloc.c
fs/ufs/inode.c
fs/ufs/namei.c
fs/vboxsf/dir.c
fs/vboxsf/shfl_hostintf.h
fs/vboxsf/utils.c
fs/verity/fsverity_private.h
fs/verity/hash_algs.c
fs/verity/init.c
fs/verity/open.c
fs/verity/signature.c
fs/verity/verify.c
fs/xattr.c
fs/xfs/libxfs/xfs_da_format.h
fs/xfs/libxfs/xfs_fs.h
fs/xfs/libxfs/xfs_inode_buf.c
fs/xfs/libxfs/xfs_trans_inode.c
fs/xfs/scrub/fscounters.c
fs/xfs/scrub/scrub.c
fs/xfs/scrub/scrub.h
fs/xfs/scrub/trace.h
fs/xfs/xfs_acl.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_ondisk.h
fs/xfs/xfs_super.c
fs/zonefs/file.c
fs/zonefs/super.c
fs/zonefs/zonefs.h
include/acpi/acnames.h
include/acpi/acpi_bus.h
include/acpi/acpixf.h
include/acpi/actbl1.h
include/acpi/actbl2.h
include/acpi/actbl3.h
include/acpi/pdc_intel.h [deleted file]
include/acpi/platform/aclinux.h
include/acpi/platform/aczephyr.h
include/acpi/proc_cap_intel.h [new file with mode: 0644]
include/asm-generic/mshyperv.h
include/asm-generic/vmlinux.lds.h
include/asm-generic/word-at-a-time.h
include/drm/display/drm_dp.h
include/drm/drm_edid.h
include/drm/drm_fb_helper.h
include/drm/drm_probe_helper.h
include/drm/gpu_scheduler.h
include/kvm/arm_vgic.h
include/linux/acpi.h
include/linux/acpi_iort.h
include/linux/arm_sdei.h
include/linux/bio.h
include/linux/blk-crypto-profile.h
include/linux/blk-mq.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/cgroup-defs.h
include/linux/clk.h
include/linux/compiler_attributes.h
include/linux/compiler_types.h
include/linux/completion.h
include/linux/cpu.h
include/linux/cpu_smt.h [new file with mode: 0644]
include/linux/cpufreq.h
include/linux/cpuhotplug.h
include/linux/cpumask.h
include/linux/decompress/mm.h
include/linux/dm-verity-loadpin.h
include/linux/dma-fence.h
include/linux/dnotify.h
include/linux/efi.h
include/linux/filelock.h
include/linux/fs.h
include/linux/fs_context.h
include/linux/fs_stack.h
include/linux/ftrace.h
include/linux/huge_mm.h
include/linux/hyperv.h
include/linux/intel_rapl.h
include/linux/iomap.h
include/linux/ism.h
include/linux/jbd2.h
include/linux/list.h
include/linux/lsm_hook_defs.h
include/linux/mm.h
include/linux/mm_types.h
include/linux/mmap_lock.h
include/linux/nls.h
include/linux/notifier.h
include/linux/nsproxy.h
include/linux/nvme.h
include/linux/pagemap.h
include/linux/pagewalk.h
include/linux/pci_ids.h
include/linux/perf/arm_pmu.h
include/linux/perf_event.h
include/linux/pipe_fs_i.h
include/linux/pm_opp.h
include/linux/pm_runtime.h
include/linux/pm_wakeirq.h
include/linux/pm_wakeup.h
include/linux/prefetch.h
include/linux/psi.h
include/linux/psi_types.h
include/linux/raid_class.h
include/linux/rbtree_augmented.h
include/linux/rculist_nulls.h
include/linux/rcupdate_trace.h
include/linux/rcupdate_wait.h
include/linux/rethook.h
include/linux/sched.h
include/linux/sched/task.h
include/linux/security.h
include/linux/seq_file.h
include/linux/serial_core.h
include/linux/shmem_fs.h
include/linux/skmsg.h
include/linux/spi/corgi_lcd.h
include/linux/spi/spi-mem.h
include/linux/srcutiny.h
include/linux/swait.h
include/linux/syscalls.h
include/linux/tcp.h
include/linux/thermal.h
include/linux/torture.h
include/linux/tpm.h
include/linux/trace_events.h
include/linux/uio.h
include/linux/virtio_net.h
include/linux/wait.h
include/linux/writeback.h
include/linux/xattr.h
include/net/bluetooth/hci_core.h
include/net/bonding.h
include/net/cfg80211.h
include/net/cfg802154.h
include/net/codel.h
include/net/devlink.h
include/net/gro.h
include/net/inet_frag.h
include/net/inet_sock.h
include/net/ip.h
include/net/ipv6.h
include/net/llc_conn.h
include/net/llc_pdu.h
include/net/mac80211.h
include/net/netfilter/nf_conntrack_tuple.h
include/net/netfilter/nf_tables.h
include/net/nsh.h
include/net/pie.h
include/net/pkt_sched.h
include/net/route.h
include/net/rsi_91x.h
include/net/rtnetlink.h
include/net/sock.h
include/net/tcp.h
include/net/vxlan.h
include/net/xfrm.h
include/scsi/scsi_device.h
include/soc/mscc/ocelot.h
include/soc/tegra/mc.h
include/trace/events/btrfs.h
include/trace/events/erofs.h
include/trace/events/jbd2.h
include/trace/events/tcp.h
include/uapi/asm-generic/unistd.h
include/uapi/linux/blkzoned.h
include/uapi/linux/btrfs_tree.h
include/uapi/linux/elf.h
include/uapi/linux/fuse.h
include/uapi/linux/if_packet.h
include/uapi/linux/mount.h
include/uapi/linux/perf_event.h
include/uapi/linux/pkt_cls.h
include/uapi/linux/quota.h
include/uapi/linux/seccomp.h
include/uapi/linux/stddef.h
include/uapi/scsi/scsi_bsg_ufs.h
include/uapi/xen/evtchn.h
include/uapi/xen/privcmd.h
include/ufs/ufs.h
include/video/kyro.h
include/xen/events.h
init/Kconfig
init/do_mounts.c
io_uring/io_uring.c
io_uring/openclose.c
io_uring/rw.c
ipc/mqueue.c
kernel/bpf/cpumap.c
kernel/bpf/inode.c
kernel/bpf/verifier.c
kernel/cgroup/cgroup.c
kernel/cpu.c
kernel/entry/common.c
kernel/events/core.c
kernel/events/ring_buffer.c
kernel/fork.c
kernel/irq/chip.c
kernel/irq/internals.h
kernel/irq/manage.c
kernel/irq/resend.c
kernel/kallsyms.c
kernel/kallsyms_selftest.c
kernel/kprobes.c
kernel/locking/locktorture.c
kernel/locking/qspinlock_paravirt.h
kernel/locking/rtmutex.c
kernel/locking/rtmutex_api.c
kernel/locking/rtmutex_common.h
kernel/locking/ww_mutex.h
kernel/nsproxy.c
kernel/power/hibernate.c
kernel/power/qos.c
kernel/power/snapshot.c
kernel/rcu/rcu.h
kernel/rcu/rcuscale.c
kernel/rcu/rcutorture.c
kernel/rcu/refscale.c
kernel/rcu/tasks.h
kernel/rcu/tree.c
kernel/rcu/tree_nocb.h
kernel/scftorture.c
kernel/sched/completion.c
kernel/sched/core.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/features.h
kernel/sched/psi.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sched/swait.c
kernel/sched/topology.c
kernel/sched/wait.c
kernel/seccomp.c
kernel/signal.c
kernel/smp.c
kernel/softirq.c
kernel/sys.c
kernel/time/clocksource.c
kernel/torture.c
kernel/trace/bpf_trace.c
kernel/trace/fgraph.c
kernel/trace/fprobe.c
kernel/trace/ftrace.c
kernel/trace/ftrace_internal.h
kernel/trace/rethook.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_eprobe.c
kernel/trace/trace_events.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_events_synth.c
kernel/trace/trace_events_trigger.c
kernel/trace/trace_events_user.c
kernel/trace/trace_irqsoff.c
kernel/trace/trace_kprobe_selftest.c
kernel/trace/trace_probe.c
kernel/trace/trace_probe_kernel.h
kernel/trace/trace_probe_tmpl.h
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_seq.c
kernel/trace/trace_uprobe.c
kernel/trace/tracing_map.h
kernel/workqueue.c
lib/Kconfig.debug
lib/Kconfig.ubsan
lib/Makefile
lib/clz_ctz.c
lib/cpumask.c
lib/genalloc.c
lib/iov_iter.c
lib/list_debug.c
lib/locking-selftest.c
lib/maple_tree.c
lib/radix-tree.c
lib/sbitmap.c
lib/scatterlist.c
lib/test_bitmap.c
lib/test_maple_tree.c
mm/Makefile
mm/compaction.c
mm/damon/core-test.h
mm/damon/core.c
mm/damon/vaddr.c
mm/filemap.c
mm/folio-compat.c
mm/gup.c
mm/hmm.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/khugepaged.c
mm/ksm.c
mm/madvise.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/mempolicy.c
mm/migrate_device.c
mm/mincore.c
mm/mlock.c
mm/mmap.c
mm/mprotect.c
mm/page-writeback.c
mm/pagewalk.c
mm/readahead.c
mm/shmem.c
mm/shmem_quota.c [new file with mode: 0644]
mm/swapfile.c
mm/truncate.c
mm/vmalloc.c
mm/vmscan.c
mm/zsmalloc.c
net/9p/client.c
net/9p/trans_virtio.c
net/batman-adv/bat_v_elp.c
net/batman-adv/bat_v_ogm.c
net/batman-adv/hard-interface.c
net/batman-adv/netlink.c
net/batman-adv/soft-interface.c
net/batman-adv/translation-table.c
net/batman-adv/types.h
net/bluetooth/hci_conn.c
net/bluetooth/hci_core.c
net/bluetooth/hci_event.c
net/bluetooth/hci_sync.c
net/bluetooth/iso.c
net/bluetooth/mgmt.c
net/bluetooth/sco.c
net/can/bcm.c
net/can/isotp.c
net/can/raw.c
net/ceph/messenger.c
net/ceph/messenger_v2.c
net/ceph/osd_client.c
net/core/bpf_sk_storage.c
net/core/filter.c
net/core/net-traces.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/skmsg.c
net/core/sock.c
net/core/sock_map.c
net/core/xdp.c
net/dcb/dcbnl.c
net/dccp/ipv4.c
net/dccp/ipv6.c
net/dccp/output.c
net/dccp/proto.c
net/devlink/leftover.c
net/dsa/port.c
net/ipv4/af_inet.c
net/ipv4/datagram.c
net/ipv4/esp4.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_diag.c
net/ipv4/inet_hashtables.c
net/ipv4/inet_timewait_sock.c
net/ipv4/ip_gre.c
net/ipv4/ip_output.c
net/ipv4/ip_sockglue.c
net/ipv4/ip_tunnel_core.c
net/ipv4/ip_vti.c
net/ipv4/nexthop.c
net/ipv4/raw.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_fastopen.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_metrics.c
net/ipv4/tcp_minisocks.c
net/ipv4/tcp_output.c
net/ipv4/tcp_timer.c
net/ipv4/udp.c
net/ipv4/udp_offload.c
net/ipv6/Kconfig
net/ipv6/addrconf.c
net/ipv6/icmp.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_vti.c
net/ipv6/ip6mr.c
net/ipv6/ndisc.c
net/ipv6/ping.c
net/ipv6/raw.c
net/ipv6/route.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp.c
net/ipv6/udp_offload.c
net/key/af_key.c
net/l2tp/l2tp_ip6.c
net/llc/af_llc.c
net/llc/llc_conn.c
net/llc/llc_if.c
net/llc/llc_input.c
net/llc/llc_sap.c
net/mac80211/rx.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/sockopt.c
net/mptcp/subflow.c
net/netfilter/ipvs/ip_vs_ctl.c
net/netfilter/nf_conntrack_core.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_conntrack_proto_sctp.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_byteorder.c
net/netfilter/nft_dynset.c
net/netfilter/nft_flow_offload.c
net/netfilter/nft_immediate.c
net/netfilter/nft_objref.c
net/netfilter/nft_set_hash.c
net/netfilter/nft_set_pipapo.c
net/netfilter/nft_set_rbtree.c
net/netfilter/nft_socket.c
net/netfilter/xt_socket.c
net/openvswitch/datapath.c
net/packet/af_packet.c
net/sched/act_api.c
net/sched/cls_bpf.c
net/sched/cls_flower.c
net/sched/cls_fw.c
net/sched/cls_matchall.c
net/sched/cls_route.c
net/sched/cls_u32.c
net/sched/em_meta.c
net/sched/sch_api.c
net/sched/sch_mqprio.c
net/sched/sch_qfq.c
net/sched/sch_taprio.c
net/sctp/socket.c
net/smc/af_smc.c
net/smc/smc.h
net/smc/smc_clc.c
net/smc/smc_core.c
net/smc/smc_sysctl.c
net/sunrpc/rpc_pipe.c
net/sunrpc/svcsock.c
net/sunrpc/xprtrdma/verbs.c
net/tipc/crypto.c
net/tipc/node.c
net/tls/tls_device.c
net/tls/tls_main.c
net/unix/af_unix.c
net/wireless/nl80211.c
net/wireless/scan.c
net/wireless/util.c
net/xdp/xsk.c
net/xfrm/xfrm_compat.c
net/xfrm/xfrm_input.c
net/xfrm/xfrm_interface_core.c
net/xfrm/xfrm_policy.c
net/xfrm/xfrm_state.c
net/xfrm/xfrm_user.c
rust/Makefile
rust/bindings/bindings_helper.h
rust/kernel/allocator.rs
rust/kernel/sync/arc.rs
rust/kernel/types.rs
rust/macros/vtable.rs
samples/ftrace/ftrace-direct-modify.c
samples/ftrace/ftrace-direct-multi-modify.c
samples/ftrace/ftrace-direct-multi.c
samples/ftrace/ftrace-direct-too.c
samples/ftrace/ftrace-direct.c
scripts/Makefile.build
scripts/Makefile.host
scripts/checkpatch.pl
scripts/clang-tools/gen_compile_commands.py
scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci [deleted file]
scripts/gcc-plugins/gcc-common.h
scripts/kallsyms.c
scripts/kconfig/gconf.c
scripts/spelling.txt
security/Kconfig.hardening
security/apparmor/apparmorfs.c
security/apparmor/policy_unpack.c
security/inode.c
security/integrity/ima/ima_policy.c
security/integrity/platform_certs/load_ipl_s390.c
security/keys/keyctl.c
security/keys/request_key.c
security/keys/sysctl.c
security/keys/trusted-keys/trusted_tpm2.c
security/loadpin/loadpin.c
security/security.c
security/selinux/hooks.c
security/selinux/selinuxfs.c
security/selinux/ss/policydb.c
security/smack/smack_lsm.c
sound/core/seq/seq_ports.c
sound/core/seq/seq_ump_client.c
sound/drivers/pcmtest.c
sound/pci/hda/patch_cs8409-tables.c
sound/pci/hda/patch_realtek.c
sound/pci/ymfpci/ymfpci.c
sound/soc/amd/acp/amd.h
sound/soc/amd/ps/acp63.h
sound/soc/amd/ps/pci-ps.c
sound/soc/amd/ps/ps-sdw-dma.c
sound/soc/amd/yc/acp6x-mach.c
sound/soc/atmel/atmel-i2s.c
sound/soc/codecs/Kconfig
sound/soc/codecs/cs35l41.c
sound/soc/codecs/cs35l56-i2c.c
sound/soc/codecs/cs35l56-spi.c
sound/soc/codecs/cs35l56.c
sound/soc/codecs/cs42l51-i2c.c
sound/soc/codecs/cs42l51.c
sound/soc/codecs/cs42l51.h
sound/soc/codecs/da7219-aad.c
sound/soc/codecs/es8316.c
sound/soc/codecs/max98363.c
sound/soc/codecs/nau8821.c
sound/soc/codecs/rt1308-sdw.c
sound/soc/codecs/rt5640.c
sound/soc/codecs/rt5645.c
sound/soc/codecs/rt5665.c
sound/soc/codecs/rt5682-sdw.c
sound/soc/codecs/rt711-sdca-sdw.c
sound/soc/codecs/rt711-sdw.c
sound/soc/codecs/rt712-sdca-sdw.c
sound/soc/codecs/rt722-sdca-sdw.c
sound/soc/codecs/tas2781-comlib.c
sound/soc/codecs/wcd-mbhc-v2.c
sound/soc/codecs/wcd934x.c
sound/soc/codecs/wcd938x.c
sound/soc/codecs/wm8904.c
sound/soc/fsl/fsl_micfil.c
sound/soc/fsl/fsl_micfil.h
sound/soc/fsl/fsl_sai.c
sound/soc/fsl/fsl_sai.h
sound/soc/fsl/fsl_spdif.c
sound/soc/intel/boards/sof_sdw.c
sound/soc/intel/boards/sof_sdw_cs42l42.c
sound/soc/meson/axg-tdm-formatter.c
sound/soc/qcom/qdsp6/q6afe-dai.c
sound/soc/qcom/qdsp6/q6apm-dai.c
sound/soc/qcom/qdsp6/q6apm.c
sound/soc/qcom/qdsp6/topology.c
sound/soc/soc-core.c
sound/soc/soc-pcm.c
sound/soc/soc-topology.c
sound/soc/sof/amd/acp.c
sound/soc/sof/intel/hda-dai-ops.c
sound/soc/sof/intel/hda-dai.c
sound/soc/sof/intel/hda.h
sound/soc/sof/ipc3-dtrace.c
sound/soc/sof/ipc3.c
sound/soc/sof/ipc4-pcm.c
sound/soc/sof/ipc4-topology.c
sound/soc/tegra/tegra210_adx.c
sound/soc/tegra/tegra210_amx.c
sound/usb/mixer_maps.c
sound/usb/quirks-table.h
sound/usb/quirks.c
tools/arch/arm64/include/asm/cputype.h
tools/arch/arm64/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
tools/arch/riscv/include/uapi/asm/bitsperlong.h [new file with mode: 0644]
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/msr-index.h
tools/arch/x86/include/uapi/asm/unistd_32.h
tools/arch/x86/include/uapi/asm/unistd_64.h
tools/build/feature/Makefile
tools/counter/Makefile
tools/hv/vmbus_testing
tools/include/linux/compiler.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/fcntl.h
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/mman.h
tools/include/uapi/linux/mount.h
tools/include/uapi/linux/prctl.h
tools/include/uapi/linux/vhost.h
tools/include/uapi/sound/asound.h
tools/lib/subcmd/help.c
tools/net/ynl/lib/ynl.py
tools/objtool/arch/x86/decode.c
tools/objtool/check.c
tools/objtool/elf.c
tools/objtool/include/objtool/arch.h
tools/objtool/include/objtool/elf.h
tools/perf/Makefile.config
tools/perf/arch/arm64/util/pmu.c
tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
tools/perf/arch/powerpc/util/skip-callchain-idx.c
tools/perf/arch/s390/entry/syscalls/syscall.tbl
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
tools/perf/bench/Build
tools/perf/bench/bench.h
tools/perf/bench/sched-seccomp-notify.c [new file with mode: 0644]
tools/perf/builtin-bench.c
tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
tools/perf/pmu-events/arch/x86/amdzen3/recommended.json
tools/perf/tests/parse-events.c
tools/perf/tests/shell/test_uprobe_from_different_cu.sh [new file with mode: 0755]
tools/perf/tests/task-exit.c
tools/perf/trace/beauty/include/linux/socket.h
tools/perf/trace/beauty/move_mount_flags.sh
tools/perf/trace/beauty/msg_flags.c
tools/perf/util/dwarf-aux.c
tools/perf/util/machine.c
tools/perf/util/parse-events.c
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/pmus.c
tools/perf/util/stat-display.c
tools/perf/util/thread-stack.c
tools/power/x86/turbostat/turbostat.c
tools/testing/cxl/test/cxl.c
tools/testing/radix-tree/maple.c
tools/testing/radix-tree/regression1.c
tools/testing/selftests/Makefile
tools/testing/selftests/alsa/.gitignore
tools/testing/selftests/alsa/test-pcmtest-driver.c
tools/testing/selftests/arm64/Makefile
tools/testing/selftests/arm64/abi/hwcap.c
tools/testing/selftests/arm64/abi/syscall-abi.c
tools/testing/selftests/arm64/bti/Makefile
tools/testing/selftests/arm64/bti/compiler.h [deleted file]
tools/testing/selftests/arm64/bti/gen/.gitignore [deleted file]
tools/testing/selftests/arm64/bti/system.c
tools/testing/selftests/arm64/bti/system.h
tools/testing/selftests/arm64/bti/test.c
tools/testing/selftests/arm64/fp/vec-syscfg.c
tools/testing/selftests/arm64/signal/test_signals_utils.h
tools/testing/selftests/arm64/signal/testcases/zt_regs.c
tools/testing/selftests/bpf/prog_tests/async_stack_depth.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/progs/async_stack_depth.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_sockmap_listen.c
tools/testing/selftests/cachestat/test_cachestat.c
tools/testing/selftests/cgroup/test_kmem.c
tools/testing/selftests/drivers/net/bonding/Makefile
tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh [new file with mode: 0755]
tools/testing/selftests/drivers/net/bonding/bond_options.sh
tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh [new file with mode: 0644]
tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
tools/testing/selftests/fchmodat2/.gitignore [moved from tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore with 83% similarity]
tools/testing/selftests/fchmodat2/Makefile [new file with mode: 0644]
tools/testing/selftests/fchmodat2/fchmodat2_test.c [new file with mode: 0644]
tools/testing/selftests/filelock/Makefile [new file with mode: 0644]
tools/testing/selftests/filelock/ofdlocks.c [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc [new file with mode: 0644]
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
tools/testing/selftests/hid/vmtest.sh
tools/testing/selftests/kselftest_harness.h
tools/testing/selftests/kvm/include/kvm_util_base.h
tools/testing/selftests/kvm/kvm_binary_stats_test.c
tools/testing/selftests/kvm/x86_64/set_sregs_test.c
tools/testing/selftests/mincore/mincore_selftest.c
tools/testing/selftests/mm/charge_reserved_hugetlb.sh [changed mode: 0644->0755]
tools/testing/selftests/mm/check_config.sh [changed mode: 0644->0755]
tools/testing/selftests/mm/hmm-tests.c
tools/testing/selftests/mm/hugetlb_reparenting_test.sh [changed mode: 0644->0755]
tools/testing/selftests/mm/ksm_tests.c
tools/testing/selftests/mm/mkdirty.c
tools/testing/selftests/mm/run_vmtests.sh [changed mode: 0644->0755]
tools/testing/selftests/mm/test_hmm.sh [changed mode: 0644->0755]
tools/testing/selftests/mm/test_vmalloc.sh [changed mode: 0644->0755]
tools/testing/selftests/mm/va_high_addr_switch.sh [changed mode: 0644->0755]
tools/testing/selftests/mm/write_hugetlb_memory.sh [changed mode: 0644->0755]
tools/testing/selftests/net/.gitignore
tools/testing/selftests/net/fib_nexthops.sh
tools/testing/selftests/net/forwarding/bridge_mdb.sh
tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
tools/testing/selftests/net/forwarding/ethtool.sh
tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
tools/testing/selftests/net/forwarding/ethtool_mm.sh
tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
tools/testing/selftests/net/forwarding/lib.sh
tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
tools/testing/selftests/net/forwarding/settings [new file with mode: 0644]
tools/testing/selftests/net/forwarding/tc_actions.sh
tools/testing/selftests/net/forwarding/tc_flower.sh
tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/pmtu.sh
tools/testing/selftests/net/so_incoming_cpu.c
tools/testing/selftests/nolibc/nolibc-test.c
tools/testing/selftests/rcutorture/bin/configcheck.sh
tools/testing/selftests/rcutorture/bin/functions.sh
tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
tools/testing/selftests/rcutorture/bin/kvm-remote.sh
tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
tools/testing/selftests/rcutorture/bin/kvm.sh
tools/testing/selftests/rcutorture/bin/mkinitrd.sh
tools/testing/selftests/rcutorture/bin/torture.sh
tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
tools/testing/selftests/rcutorture/configs/rcu/TASKS03
tools/testing/selftests/rcutorture/configs/rcu/TREE01
tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c [deleted file]
tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh [deleted file]
tools/testing/selftests/riscv/Makefile
tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
tools/testing/selftests/rseq/Makefile
tools/testing/selftests/rseq/rseq.c
tools/testing/selftests/seccomp/seccomp_bpf.c
tools/testing/selftests/tc-testing/config
tools/testing/selftests/tc-testing/settings [new file with mode: 0644]
tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json
tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
tools/testing/selftests/timers/raw_skew.c
tools/testing/selftests/user_events/dyn_test.c
tools/testing/vsock/Makefile
virt/kvm/kvm_main.c

index 1bce47a..e506625 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -13,7 +13,9 @@
 Aaron Durbin <adurbin@google.com>
 Abel Vesa <abelvesa@kernel.org> <abel.vesa@nxp.com>
 Abel Vesa <abelvesa@kernel.org> <abelvesa@gmail.com>
+Abhijeet Dharmapurikar <quic_adharmap@quicinc.com> <adharmap@codeaurora.org>
 Abhinav Kumar <quic_abhinavk@quicinc.com> <abhinavk@codeaurora.org>
+Ahmad Masri <quic_amasri@quicinc.com> <amasri@codeaurora.org>
 Adam Oldham <oldhamca@gmail.com>
 Adam Radford <aradford@gmail.com>
 Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com>
@@ -30,6 +32,7 @@ Alexander Mikhalitsyn <alexander@mihalicyn.com> <alexander.mikhalitsyn@virtuozzo
 Alexander Mikhalitsyn <alexander@mihalicyn.com> <aleksandr.mikhalitsyn@canonical.com>
 Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electrons.com>
 Alexandre Ghiti <alex@ghiti.fr> <alexandre.ghiti@canonical.com>
+Alexei Avshalom Lazar <quic_ailizaro@quicinc.com> <ailizaro@codeaurora.org>
 Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
 Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
 Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
@@ -37,8 +40,11 @@ Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
 Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
 Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
 Alex Shi <alexs@kernel.org> <alex.shi@linux.alibaba.com>
+Aloka Dixit <quic_alokad@quicinc.com> <alokad@codeaurora.org>
 Al Viro <viro@ftp.linux.org.uk>
 Al Viro <viro@zenIV.linux.org.uk>
+Amit Blay <quic_ablay@quicinc.com> <ablay@codeaurora.org>
+Amit Nischal <quic_anischal@quicinc.com> <anischal@codeaurora.org>
 Andi Kleen <ak@linux.intel.com> <ak@suse.de>
 Andi Shyti <andi@etezian.org> <andi.shyti@samsung.com>
 Andreas Herrmann <aherrman@de.ibm.com>
@@ -54,6 +60,8 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com>
 Andrzej Hajda <andrzej.hajda@intel.com> <a.hajda@samsung.com>
 André Almeida <andrealmeid@igalia.com> <andrealmeid@collabora.com>
 Andy Adamson <andros@citi.umich.edu>
+Anilkumar Kolli <quic_akolli@quicinc.com> <akolli@codeaurora.org>
+Anirudh Ghayal <quic_aghayal@quicinc.com> <aghayal@codeaurora.org>
 Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
 Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
 Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
@@ -62,9 +70,17 @@ Archit Taneja <archit@ti.com>
 Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
 Arnaud Patard <arnaud.patard@rtp-net.org>
 Arnd Bergmann <arnd@arndb.de>
+Arun Kumar Neelakantam <quic_aneela@quicinc.com> <aneela@codeaurora.org>
+Ashok Raj Nagarajan <quic_arnagara@quicinc.com> <arnagara@codeaurora.org>
+Ashwin Chaugule <quic_ashwinc@quicinc.com> <ashwinc@codeaurora.org>
+Asutosh Das <quic_asutoshd@quicinc.com> <asutoshd@codeaurora.org>
 Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
+Avaneesh Kumar Dwivedi <quic_akdwived@quicinc.com> <akdwived@codeaurora.org>
 Axel Dyks <xl@xlsigned.net>
 Axel Lin <axel.lin@gmail.com>
+Balakrishna Godavarthi <quic_bgodavar@quicinc.com> <bgodavar@codeaurora.org>
+Banajit Goswami <quic_bgoswami@quicinc.com> <bgoswami@codeaurora.org>
+Baochen Qiang <quic_bqiang@quicinc.com> <bqiang@codeaurora.org>
 Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@linaro.org>
 Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@spreadtrum.com>
 Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
@@ -93,12 +109,15 @@ Brian Avery <b.avery@hp.com>
 Brian King <brking@us.ibm.com>
 Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
 Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
+Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
+Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
 Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
 Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
 Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
 Chao Yu <chao@kernel.org> <yuchao0@huawei.com>
 Chris Chiu <chris.chiu@canonical.com> <chiu@endlessm.com>
 Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
+Chris Lew <quic_clew@quicinc.com> <clew@codeaurora.org>
 Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
 Christian Borntraeger <borntraeger@linux.ibm.com> <cborntra@de.ibm.com>
 Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
@@ -119,7 +138,10 @@ Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
 Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
 Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
 David Brownell <david-b@pacbell.net>
+David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
 David Woodhouse <dwmw2@shinybook.infradead.org>
+Dedy Lansky <quic_dlansky@quicinc.com> <dlansky@codeaurora.org>
+Deepak Kumar Singh <quic_deesin@quicinc.com> <deesin@codeaurora.org>
 Dengcheng Zhu <dzhu@wavecomp.com> <dczhu@mips.com>
 Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@gmail.com>
 Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@imgtec.com>
@@ -136,6 +158,7 @@ Dmitry Safonov <0x7f454c46@gmail.com> <dsafonov@virtuozzo.com>
 Domen Puncer <domen@coderock.org>
 Douglas Gilbert <dougg@torque.net>
 Ed L. Cashin <ecashin@coraid.com>
+Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
 Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
 Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
 Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
@@ -148,6 +171,7 @@ Faith Ekstrand <faith.ekstrand@collabora.com> <jason.ekstrand@collabora.com>
 Felipe W Damasio <felipewd@terra.com.br>
 Felix Kuhling <fxkuehl@gmx.de>
 Felix Moeller <felix@derklecks.de>
+Fenglin Wu <quic_fenglinw@quicinc.com> <fenglinw@codeaurora.org>
 Filipe Lautert <filipe@icewall.org>
 Finn Thain <fthain@linux-m68k.org> <fthain@telegraphics.com.au>
 Franck Bui-Huu <vagabon.xyz@gmail.com>
@@ -171,8 +195,11 @@ Greg Kurz <groug@kaod.org> <gkurz@linux.vnet.ibm.com>
 Gregory CLEMENT <gregory.clement@bootlin.com> <gregory.clement@free-electrons.com>
 Guilherme G. Piccoli <kernel@gpiccoli.net> <gpiccoli@linux.vnet.ibm.com>
 Guilherme G. Piccoli <kernel@gpiccoli.net> <gpiccoli@canonical.com>
+Gokul Sriram Palanisamy <quic_gokulsri@quicinc.com> <gokulsri@codeaurora.org>
+Govindaraj Saminathan <quic_gsamin@quicinc.com> <gsamin@codeaurora.org>
 Guo Ren <guoren@kernel.org> <guoren@linux.alibaba.com>
 Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com>
+Guru Das Srinagesh <quic_gurus@quicinc.com> <gurus@codeaurora.org>
 Gustavo Padovan <gustavo@las.ic.unicamp.br>
 Gustavo Padovan <padovan@profusion.mobi>
 Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org>
@@ -190,6 +217,7 @@ Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
 J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
 J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
 Jacob Shin <Jacob.Shin@amd.com>
+Jack Pham <quic_jackp@quicinc.com> <jackp@codeaurora.org>
 Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com>
 Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com>
 Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@motorola.com>
@@ -217,10 +245,12 @@ Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com>
 Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com>
 <jean-philippe@linaro.org> <jean-philippe.brucker@arm.com>
 Jean Tourrilhes <jt@hpl.hp.com>
+Jeevan Shriram <quic_jshriram@quicinc.com> <jshriram@codeaurora.org>
 Jeff Garzik <jgarzik@pretzel.yyz.us>
 Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net>
 Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
 Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
+Jeffrey Hugo <quic_jhugo@quicinc.com> <jhugo@codeaurora.org>
 Jens Axboe <axboe@kernel.dk> <axboe@suse.de>
 Jens Axboe <axboe@kernel.dk> <jens.axboe@oracle.com>
 Jens Axboe <axboe@kernel.dk> <axboe@fb.com>
@@ -228,6 +258,7 @@ Jens Axboe <axboe@kernel.dk> <axboe@meta.com>
 Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
 Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
 Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
+Jilai Wang <quic_jilaiw@quicinc.com> <jilaiw@codeaurora.org>
 Jiri Pirko <jiri@resnulli.us> <jiri@nvidia.com>
 Jiri Pirko <jiri@resnulli.us> <jiri@mellanox.com>
 Jiri Pirko <jiri@resnulli.us> <jpirko@redhat.com>
@@ -238,14 +269,17 @@ Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.cz>
 Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
 Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
 Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
+Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org>
 Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
 Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
 John Crispin <john@phrozen.org> <blogic@openwrt.org>
+John Fastabend <john.fastabend@gmail.com> <john.r.fastabend@intel.com>
 John Keeping <john@keeping.me.uk> <john@metanate.com>
 John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
 John Stultz <johnstul@us.ibm.com>
 <jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
 <jon.toppins+linux@gmail.com> <jtoppins@redhat.com>
+Jonas Gorski <jonas.gorski@gmail.com> <jogo@openwrt.org>
 Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
 <josh@joshtriplett.org> <josh@freedesktop.org>
 <josh@joshtriplett.org> <josh@kernel.org>
@@ -254,6 +288,7 @@ Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
 <josh@joshtriplett.org> <josht@vnet.ibm.com>
 Josh Poimboeuf <jpoimboe@kernel.org> <jpoimboe@redhat.com>
 Josh Poimboeuf <jpoimboe@kernel.org> <jpoimboe@us.ibm.com>
+Jouni Malinen <quic_jouni@quicinc.com> <jouni@codeaurora.org>
 Juha Yrjola <at solidboot.com>
 Juha Yrjola <juha.yrjola@nokia.com>
 Juha Yrjola <juha.yrjola@solidboot.com>
@@ -261,6 +296,8 @@ Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
 Iskren Chernev <me@iskren.info> <iskren.chernev@gmail.com>
 Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
 Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
+Karthikeyan Periyasamy <quic_periyasa@quicinc.com> <periyasa@codeaurora.org>
+Kathiravan T <quic_kathirav@quicinc.com> <kathirav@codeaurora.org>
 Kay Sievers <kay.sievers@vrfy.org>
 Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
 Kees Cook <keescook@chromium.org> <keescook@google.com>
@@ -269,6 +306,8 @@ Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
 Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
 Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
 Kenneth W Chen <kenneth.w.chen@intel.com>
+Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org>
+Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org>
 Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com>
 Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
 Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
@@ -277,6 +316,7 @@ Krishna Manikandan <quic_mkrishn@quicinc.com> <mkrishn@codeaurora.org>
 Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
 Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
 Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
+Kshitiz Godara <quic_kgodara@quicinc.com> <kgodara@codeaurora.org>
 Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
 Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
 Lee Jones <lee@kernel.org> <joneslee@google.com>
@@ -290,19 +330,27 @@ Leonid I Ananiev <leonid.i.ananiev@intel.com>
 Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
 Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
 Leon Romanovsky <leon@kernel.org> <leonro@nvidia.com>
+Liam Mark <quic_lmark@quicinc.com> <lmark@codeaurora.org>
 Linas Vepstas <linas@austin.ibm.com>
 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
 Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
 <linux-hardening@vger.kernel.org> <kernel-hardening@lists.openwall.com>
 Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
 Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
+Lior David <quic_liord@quicinc.com> <liord@codeaurora.org>
 Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
 Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
 Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
+Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
 Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
 Maciej W. Rozycki <macro@orcam.me.uk> <macro@linux-mips.org>
+Maharaja Kennadyrajan <quic_mkenna@quicinc.com> <mkenna@codeaurora.org>
+Maheshwar Ajja <quic_majja@quicinc.com> <majja@codeaurora.org>
+Malathi Gottam <quic_mgottam@quicinc.com> <mgottam@codeaurora.org>
+Manikanta Pubbisetty <quic_mpubbise@quicinc.com> <mpubbise@codeaurora.org>
 Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com>
 Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org>
+Manoj Basapathi <quic_manojbm@quicinc.com> <manojbm@codeaurora.org>
 Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
 Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
 Marek Behún <kabel@kernel.org> <marek.behun@nic.cz>
@@ -332,6 +380,7 @@ Matt Ranostay <matt.ranostay@konsulko.com> <matt@ranostay.consulting>
 Matt Ranostay <mranostay@gmail.com> Matthew Ranostay <mranostay@embeddedalley.com>
 Matt Ranostay <mranostay@gmail.com> <matt.ranostay@intel.com>
 Matt Redfearn <matt.redfearn@mips.com> <matt.redfearn@imgtec.com>
+Maulik Shah <quic_mkshah@quicinc.com> <mkshah@codeaurora.org>
 Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@infradead.org>
@@ -344,7 +393,10 @@ Maxim Mikityanskiy <maxtram95@gmail.com> <maximmi@nvidia.com>
 Maxime Ripard <mripard@kernel.org> <maxime@cerno.tech>
 Maxime Ripard <mripard@kernel.org> <maxime.ripard@bootlin.com>
 Maxime Ripard <mripard@kernel.org> <maxime.ripard@free-electrons.com>
+Maya Erez <quic_merez@quicinc.com> <merez@codeaurora.org>
 Mayuresh Janorkar <mayur@ti.com>
+Md Sadre Alam <quic_mdalam@quicinc.com> <mdalam@codeaurora.org>
+Miaoqing Pan <quic_miaoqing@quicinc.com> <miaoqing@codeaurora.org>
 Michael Buesch <m@bues.ch>
 Michal Simek <michal.simek@amd.com> <michal.simek@xilinx.com>
 Michel Dänzer <michel@tungstengraphics.com>
@@ -355,6 +407,7 @@ Miguel Ojeda <ojeda@kernel.org> <miguel.ojeda.sandonis@gmail.com>
 Mike Rapoport <rppt@kernel.org> <mike@compulab.co.il>
 Mike Rapoport <rppt@kernel.org> <mike.rapoport@gmail.com>
 Mike Rapoport <rppt@kernel.org> <rppt@linux.ibm.com>
+Mike Tipton <quic_mdtipton@quicinc.com> <mdtipton@codeaurora.org>
 Miodrag Dinic <miodrag.dinic@mips.com> <miodrag.dinic@imgtec.com>
 Miquel Raynal <miquel.raynal@bootlin.com> <miquel.raynal@free-electrons.com>
 Mitesh shah <mshah@teja.com>
@@ -363,9 +416,13 @@ Morten Welinder <terra@gnome.org>
 Morten Welinder <welinder@anemone.rentec.com>
 Morten Welinder <welinder@darter.rentec.com>
 Morten Welinder <welinder@troll.com>
+Mukesh Ojha <quic_mojha@quicinc.com> <mojha@codeaurora.org>
+Muna Sinada <quic_msinada@quicinc.com> <msinada@codeaurora.org>
+Murali Nalajala <quic_mnalajal@quicinc.com> <mnalajal@codeaurora.org>
 Mythri P K <mythripk@ti.com>
 Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
 Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
+Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
 Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
 Nguyen Anh Quynh <aquynh@gmail.com>
 Nicholas Piggin <npiggin@gmail.com> <npiggen@suse.de>
@@ -384,6 +441,7 @@ Nikolay Aleksandrov <razor@blackwall.org> <nikolay@redhat.com>
 Nikolay Aleksandrov <razor@blackwall.org> <nikolay@cumulusnetworks.com>
 Nikolay Aleksandrov <razor@blackwall.org> <nikolay@nvidia.com>
 Nikolay Aleksandrov <razor@blackwall.org> <nikolay@isovalent.com>
+Odelu Kukatla <quic_okukatla@quicinc.com> <okukatla@codeaurora.org>
 Oleksandr Natalenko <oleksandr@natalenko.name> <oleksandr@redhat.com>
 Oleksij Rempel <linux@rempel-privat.de> <bug-track@fisher-privat.net>
 Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
@@ -391,6 +449,7 @@ Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
 Oleksij Rempel <linux@rempel-privat.de> <o.rempel@pengutronix.de>
 Oleksij Rempel <linux@rempel-privat.de> <ore@pengutronix.de>
 Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
+Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org>
 Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com>
 Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
 Patrick Mochel <mochel@digitalimplant.org>
@@ -402,11 +461,14 @@ Paul E. McKenney <paulmck@kernel.org> <paulmck@linux.vnet.ibm.com>
 Paul E. McKenney <paulmck@kernel.org> <paulmck@us.ibm.com>
 Paul Mackerras <paulus@ozlabs.org> <paulus@samba.org>
 Paul Mackerras <paulus@ozlabs.org> <paulus@au1.ibm.com>
+Pavankumar Kondeti <quic_pkondeti@quicinc.com> <pkondeti@codeaurora.org>
 Peter A Jonsson <pj@ludd.ltu.se>
 Peter Oruba <peter.oruba@amd.com>
 Peter Oruba <peter@oruba.de>
 Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
 Praveen BP <praveenbp@ti.com>
+Pradeep Kumar Chitrapu <quic_pradeepc@quicinc.com> <pradeepc@codeaurora.org>
+Prasad Sodagudi <quic_psodagud@quicinc.com> <psodagud@codeaurora.org>
 Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
 Qais Yousef <qyousef@layalina.io> <qais.yousef@imgtec.com>
 Qais Yousef <qyousef@layalina.io> <qais.yousef@arm.com>
@@ -415,10 +477,16 @@ Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
 Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
 Rajeev Nandan <quic_rajeevny@quicinc.com> <rajeevny@codeaurora.org>
 Rajendra Nayak <quic_rjendra@quicinc.com> <rnayak@codeaurora.org>
+Rajeshwari Ravindra Kamble <quic_rkambl@quicinc.com> <rkambl@codeaurora.org>
+Raju P.L.S.S.S.N <quic_rplsssn@quicinc.com> <rplsssn@codeaurora.org>
 Rajesh Shah <rajesh.shah@intel.com>
+Rakesh Pillai <quic_pillair@quicinc.com> <pillair@codeaurora.org>
 Ralf Baechle <ralf@linux-mips.org>
 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
+Ram Chandra Jangir <quic_rjangir@quicinc.com> <rjangir@codeaurora.org>
 Randy Dunlap <rdunlap@infradead.org> <rdunlap@xenotime.net>
+Ravi Kumar Bokka <quic_rbokka@quicinc.com> <rbokka@codeaurora.org>
+Ravi Kumar Siddojigari <quic_rsiddoji@quicinc.com> <rsiddoji@codeaurora.org>
 Rémi Denis-Courmont <rdenis@simphalempin.com>
 Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
 Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
@@ -427,6 +495,7 @@ Richard Leitner <richard.leitner@linux.dev> <dev@g0hl1n.net>
 Richard Leitner <richard.leitner@linux.dev> <me@g0hl1n.net>
 Richard Leitner <richard.leitner@linux.dev> <richard.leitner@skidata.com>
 Robert Foss <rfoss@kernel.org> <robert.foss@linaro.org>
+Rocky Liao <quic_rjliao@quicinc.com> <rjliao@codeaurora.org>
 Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
 Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
 Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
@@ -444,22 +513,37 @@ Santosh Shilimkar <santosh.shilimkar@oracle.org>
 Santosh Shilimkar <ssantosh@kernel.org>
 Sarangdhar Joshi <spjoshi@codeaurora.org>
 Sascha Hauer <s.hauer@pengutronix.de>
+Sahitya Tummala <quic_stummala@quicinc.com> <stummala@codeaurora.org>
+Sathishkumar Muruganandam <quic_murugana@quicinc.com> <murugana@codeaurora.org>
 Satya Priya <quic_c_skakit@quicinc.com> <skakit@codeaurora.org>
 S.ÇaÄŸlar Onur <caglar@pardus.org.tr>
+Sayali Lokhande <quic_sayalil@quicinc.com> <sayalil@codeaurora.org>
 Sean Christopherson <seanjc@google.com> <sean.j.christopherson@intel.com>
 Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
+Sean Tranchetti <quic_stranche@quicinc.com> <stranche@codeaurora.org>
 Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
 Sebastian Reichel <sre@kernel.org> <sre@debian.org>
 Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
+Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org>
 Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
 Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io>
+Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com>
+Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com>
+Sharath Chandra Vurukala <quic_sharathv@quicinc.com> <sharathv@codeaurora.org>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
 Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
 Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
 Shuah Khan <shuah@kernel.org> <shuahkh@osg.samsung.com>
 Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
+Sibi Sankar <quic_sibis@quicinc.com> <sibis@codeaurora.org>
+Sid Manning <quic_sidneym@quicinc.com> <sidneym@codeaurora.org>
 Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
+Simon Horman <horms@kernel.org> <simon.horman@corigine.com>
+Simon Horman <horms@kernel.org> <simon.horman@netronome.com>
 Simon Kelley <simon@thekelleys.org.uk>
+Sricharan Ramabadhran <quic_srichara@quicinc.com> <sricharan@codeaurora.org>
+Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
+Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org>
 Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
 Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
 Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
@@ -467,22 +551,30 @@ Stephen Hemminger <stephen@networkplumber.org> <sthemmin@microsoft.com>
 Stephen Hemminger <stephen@networkplumber.org> <sthemmin@vyatta.com>
 Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
 Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
-Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com> <subashab@codeaurora.org>
+Subbaraman Narayanamurthy <quic_subbaram@quicinc.com> <subbaram@codeaurora.org>
 Subhash Jadavani <subhashj@codeaurora.org>
+Sudarshan Rajagopalan <quic_sudaraja@quicinc.com> <sudaraja@codeaurora.org>
 Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
 Sumit Semwal <sumit.semwal@ti.com>
+Surabhi Vishnoi <quic_svishnoi@quicinc.com> <svishnoi@codeaurora.org>
 Takashi YOSHII <takashi.yoshii.zj@renesas.com>
+Tamizh Chelvam Raja <quic_tamizhr@quicinc.com> <tamizhr@codeaurora.org>
+Taniya Das <quic_tdas@quicinc.com> <tdas@codeaurora.org>
 Tejun Heo <htejun@gmail.com>
 Thomas Graf <tgraf@suug.ch>
 Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu>
 Thomas Pedersen <twp@codeaurora.org>
 Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com>
+Tingwei Zhang <quic_tingwei@quicinc.com> <tingwei@codeaurora.org>
+Tirupathi Reddy <quic_tirupath@quicinc.com> <tirupath@codeaurora.org>
 Tobias Klauser <tklauser@distanz.ch> <tobias.klauser@gmail.com>
 Tobias Klauser <tklauser@distanz.ch> <klto@zhaw.ch>
 Tobias Klauser <tklauser@distanz.ch> <tklauser@nuerscht.ch>
 Tobias Klauser <tklauser@distanz.ch> <tklauser@xenon.tklauser.home>
 Todor Tomov <todor.too@gmail.com> <todor.tomov@linaro.org>
 Tony Luck <tony.luck@intel.com>
+Trilok Soni <quic_tsoni@quicinc.com> <tsoni@codeaurora.org>
 TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org>
 TripleX Chung <xxx.phy@gmail.com> <zhongyu@18mail.cn>
 Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
@@ -495,11 +587,17 @@ Uwe Kleine-König <ukleinek@strlen.de>
 Uwe Kleine-König <ukl@pengutronix.de>
 Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
 Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
+Vara Reddy <quic_varar@quicinc.com> <varar@codeaurora.org>
+Varadarajan Narayanan <quic_varada@quicinc.com> <varada@codeaurora.org>
+Vasanthakumar Thiagarajan <quic_vthiagar@quicinc.com> <vthiagar@codeaurora.org>
 Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
 Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
 Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
 Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
 Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com>
+Veera Sundaram Sankaran <quic_veeras@quicinc.com> <veeras@codeaurora.org>
+Veerabhadrarao Badiganti <quic_vbadigan@quicinc.com> <vbadigan@codeaurora.org>
+Venkateswara Naralasetty <quic_vnaralas@quicinc.com> <vnaralas@codeaurora.org>
 Vikash Garodia <quic_vgarodia@quicinc.com> <vgarodia@codeaurora.org>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
 Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
@@ -509,11 +607,14 @@ Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
 Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
 Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org>
 Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com>
+Vivek Aknurwar <quic_viveka@quicinc.com> <viveka@codeaurora.org>
 Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
 Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
 Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
 WeiXiong Liao <gmpy.liaowx@gmail.com> <liaoweixiong@allwinnertech.com>
+Wen Gong <quic_wgong@quicinc.com> <wgong@codeaurora.org>
+Wesley Cheng <quic_wcheng@quicinc.com> <wcheng@codeaurora.org>
 Will Deacon <will@kernel.org> <will.deacon@arm.com>
 Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
 Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
index 6350dd8..087f762 100644 (file)
@@ -82,7 +82,12 @@ Description:
                whether it resides in persistent capacity, volatile capacity,
                or the LSA, is made permanently unavailable by whatever means
                is appropriate for the media type. This functionality requires
-               the device to be not be actively decoding any HPA ranges.
+               the device to be disabled, that is, not actively decoding any
+               HPA ranges. This permits avoiding explicit global CPU cache
+               management, relying instead for it to be done when a region
+               transitions between software programmed and hardware committed
+               states. If this file is not present, then there is no hardware
+               support for the operation.
 
 
 What            /sys/bus/cxl/devices/memX/security/erase
@@ -92,7 +97,13 @@ Contact:        linux-cxl@vger.kernel.org
 Description:
                (WO) Write a boolean 'true' string value to this attribute to
                secure erase user data by changing the media encryption keys for
-               all user data areas of the device.
+               all user data areas of the device. This functionality requires
+               the device to be disabled, that is, not actively decoding any
+               HPA ranges. This permits avoiding explicit global CPU cache
+               management, relying instead for it to be done when a region
+               transitions between software programmed and hardware committed
+               states. If this file is not present, then there is no hardware
+               support for the operation.
 
 
 What:          /sys/bus/cxl/devices/memX/firmware/
index 78b62a2..f6d9d72 100644 (file)
@@ -13,7 +13,7 @@ Description:
                Specifies the duration of the LED blink in milliseconds.
                Defaults to 50 ms.
 
-               With hw_control ON, the interval value MUST be set to the
+               When offloaded is true, the interval value MUST be set to the
                default value and cannot be changed.
                Trying to set any value in this specific mode will return
                an EINVAL error.
@@ -44,8 +44,8 @@ Description:
                If set to 1, the LED will blink for the milliseconds specified
                in interval to signal transmission.
 
-               With hw_control ON, the blink interval is controlled by hardware
-               and won't reflect the value set in interval.
+               When offloaded is true, the blink interval is controlled by
+               hardware and won't reflect the value set in interval.
 
 What:          /sys/class/leds/<led>/rx
 Date:          Dec 2017
@@ -59,21 +59,21 @@ Description:
                If set to 1, the LED will blink for the milliseconds specified
                in interval to signal reception.
 
-               With hw_control ON, the blink interval is controlled by hardware
-               and won't reflect the value set in interval.
+               When offloaded is true, the blink interval is controlled by
+               hardware and won't reflect the value set in interval.
 
-What:          /sys/class/leds/<led>/hw_control
+What:          /sys/class/leds/<led>/offloaded
 Date:          Jun 2023
 KernelVersion: 6.5
 Contact:       linux-leds@vger.kernel.org
 Description:
-               Communicate whether the LED trigger modes are driven by hardware
-               or software fallback is used.
+               Communicate whether the LED trigger modes are offloaded to
+               hardware or whether software fallback is used.
 
                If 0, the LED is using software fallback to blink.
 
-               If 1, the LED is using hardware control to blink and signal the
-               requested modes.
+               If 1, the LED blinking in requested mode is offloaded to
+               hardware.
 
 What:          /sys/class/leds/<led>/link_10
 Date:          Jun 2023
index ecd585c..183a07c 100644 (file)
@@ -513,17 +513,18 @@ Description:      information about CPUs heterogeneity.
                cpu_capacity: capacity of cpuX.
 
 What:          /sys/devices/system/cpu/vulnerabilities
+               /sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+               /sys/devices/system/cpu/vulnerabilities/itlb_multihit
+               /sys/devices/system/cpu/vulnerabilities/l1tf
+               /sys/devices/system/cpu/vulnerabilities/mds
                /sys/devices/system/cpu/vulnerabilities/meltdown
+               /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+               /sys/devices/system/cpu/vulnerabilities/retbleed
+               /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
                /sys/devices/system/cpu/vulnerabilities/spectre_v1
                /sys/devices/system/cpu/vulnerabilities/spectre_v2
-               /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
-               /sys/devices/system/cpu/vulnerabilities/l1tf
-               /sys/devices/system/cpu/vulnerabilities/mds
                /sys/devices/system/cpu/vulnerabilities/srbds
                /sys/devices/system/cpu/vulnerabilities/tsx_async_abort
-               /sys/devices/system/cpu/vulnerabilities/itlb_multihit
-               /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
-               /sys/devices/system/cpu/vulnerabilities/retbleed
 Date:          January 2018
 Contact:       Linux kernel mailing list <linux-kernel@vger.kernel.org>
 Description:   Information about CPU vulnerabilities
@@ -555,6 +556,7 @@ Description:        Control Symmetric Multi Threading (SMT)
                         ================ =========================================
                         "on"             SMT is enabled
                         "off"            SMT is disabled
+                        "<N>"            SMT is enabled with N threads per core.
                         "forceoff"       SMT is force disabled. Cannot be changed.
                         "notsupported"   SMT is not supported by the CPU
                         "notimplemented" SMT runtime toggling is not
index c308926..d46b1c8 100644 (file)
@@ -1,4 +1,5 @@
 What:          /sys/bus/platform/devices/GGL0001:*/BINF.2
+               /sys/bus/platform/devices/GOOG0016:*/BINF.2
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -10,6 +11,7 @@ Description:
                == ===============================
 
 What:          /sys/bus/platform/devices/GGL0001:*/BINF.3
+               /sys/bus/platform/devices/GOOG0016:*/BINF.3
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -23,6 +25,7 @@ Description:
                == =====================================
 
 What:          /sys/bus/platform/devices/GGL0001:*/CHSW
+               /sys/bus/platform/devices/GOOG0016:*/CHSW
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -38,6 +41,7 @@ Description:
                ==== ===========================================
 
 What:          /sys/bus/platform/devices/GGL0001:*/FMAP
+               /sys/bus/platform/devices/GOOG0016:*/FMAP
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -45,6 +49,7 @@ Description:
                processor firmware flashmap.
 
 What:          /sys/bus/platform/devices/GGL0001:*/FRID
+               /sys/bus/platform/devices/GOOG0016:*/FRID
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -52,6 +57,7 @@ Description:
                main processor firmware.
 
 What:          /sys/bus/platform/devices/GGL0001:*/FWID
+               /sys/bus/platform/devices/GOOG0016:*/FWID
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -59,6 +65,7 @@ Description:
                main processor firmware.
 
 What:          /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.0
+               /sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.0
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -73,6 +80,7 @@ Description:
                =========== ==================================
 
 What:          /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.1
+               /sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.1
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -84,6 +92,7 @@ Description:
                == =======================
 
 What:          /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.2
+               /sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.2
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -91,18 +100,21 @@ Description:
                controller.
 
 What:          /sys/bus/platform/devices/GGL0001:*/GPIO.X/GPIO.3
+               /sys/bus/platform/devices/GOOG0016:*/GPIO.X/GPIO.3
 Date:          May 2022
 KernelVersion: 5.19
 Description:
                Returns name of the GPIO controller.
 
 What:          /sys/bus/platform/devices/GGL0001:*/HWID
+               /sys/bus/platform/devices/GOOG0016:*/HWID
 Date:          May 2022
 KernelVersion: 5.19
 Description:
                Returns hardware ID for the Chromebook.
 
 What:          /sys/bus/platform/devices/GGL0001:*/MECK
+               /sys/bus/platform/devices/GOOG0016:*/MECK
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -113,6 +125,7 @@ Description:
                present, or if the firmware was unable to read the extended registers, this buffer size can be zero.
 
 What:          /sys/bus/platform/devices/GGL0001:*/VBNV.0
+               /sys/bus/platform/devices/GOOG0016:*/VBNV.0
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -122,6 +135,7 @@ Description:
                clock data).
 
 What:          /sys/bus/platform/devices/GGL0001:*/VBNV.1
+               /sys/bus/platform/devices/GOOG0016:*/VBNV.1
 Date:          May 2022
 KernelVersion: 5.19
 Description:
@@ -129,9 +143,10 @@ Description:
                storage block.
 
 What:          /sys/bus/platform/devices/GGL0001:*/VDAT
+               /sys/bus/platform/devices/GOOG0016:*/VDAT
 Date:          May 2022
 KernelVersion: 5.19
 Description:
                Returns the verified boot data block shared between the
                firmware verification step and the kernel verification step
-               (binary).
+               (hex dump).
index d5f44fc..e487f96 100644 (file)
@@ -994,7 +994,7 @@ Description:        This file shows the amount of physical memory needed
 What:          /sys/bus/platform/drivers/ufshcd/*/rpm_lvl
 What:          /sys/bus/platform/devices/*.ufs/rpm_lvl
 Date:          September 2014
-Contact:       Subhash Jadavani <subhashj@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This entry could be used to set or show the UFS device
                runtime power management level. The current driver
                implementation supports 7 levels with next target states:
@@ -1021,7 +1021,7 @@ Description:      This entry could be used to set or show the UFS device
 What:          /sys/bus/platform/drivers/ufshcd/*/rpm_target_dev_state
 What:          /sys/bus/platform/devices/*.ufs/rpm_target_dev_state
 Date:          February 2018
-Contact:       Subhash Jadavani <subhashj@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This entry shows the target power mode of an UFS device
                for the chosen runtime power management level.
 
@@ -1030,7 +1030,7 @@ Description:      This entry shows the target power mode of an UFS device
 What:          /sys/bus/platform/drivers/ufshcd/*/rpm_target_link_state
 What:          /sys/bus/platform/devices/*.ufs/rpm_target_link_state
 Date:          February 2018
-Contact:       Subhash Jadavani <subhashj@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This entry shows the target state of an UFS UIC link
                for the chosen runtime power management level.
 
@@ -1039,7 +1039,7 @@ Description:      This entry shows the target state of an UFS UIC link
 What:          /sys/bus/platform/drivers/ufshcd/*/spm_lvl
 What:          /sys/bus/platform/devices/*.ufs/spm_lvl
 Date:          September 2014
-Contact:       Subhash Jadavani <subhashj@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This entry could be used to set or show the UFS device
                system power management level. The current driver
                implementation supports 7 levels with next target states:
@@ -1066,7 +1066,7 @@ Description:      This entry could be used to set or show the UFS device
 What:          /sys/bus/platform/drivers/ufshcd/*/spm_target_dev_state
 What:          /sys/bus/platform/devices/*.ufs/spm_target_dev_state
 Date:          February 2018
-Contact:       Subhash Jadavani <subhashj@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This entry shows the target power mode of an UFS device
                for the chosen system power management level.
 
@@ -1075,7 +1075,7 @@ Description:      This entry shows the target power mode of an UFS device
 What:          /sys/bus/platform/drivers/ufshcd/*/spm_target_link_state
 What:          /sys/bus/platform/devices/*.ufs/spm_target_link_state
 Date:          February 2018
-Contact:       Subhash Jadavani <subhashj@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This entry shows the target state of an UFS UIC link
                for the chosen system power management level.
 
@@ -1084,7 +1084,7 @@ Description:      This entry shows the target state of an UFS UIC link
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/monitor_enable
 What:          /sys/bus/platform/devices/*.ufs/monitor/monitor_enable
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the status of performance monitor enablement
                and it can be used to start/stop the monitor. When the monitor
                is stopped, the performance data collected is also cleared.
@@ -1092,7 +1092,7 @@ Description:      This file shows the status of performance monitor enablement
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/monitor_chunk_size
 What:          /sys/bus/platform/devices/*.ufs/monitor/monitor_chunk_size
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file tells the monitor to focus on requests transferring
                data of specific chunk size (in Bytes). 0 means any chunk size.
                It can only be changed when monitor is disabled.
@@ -1100,7 +1100,7 @@ Description:      This file tells the monitor to focus on requests transferring
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/read_total_sectors
 What:          /sys/bus/platform/devices/*.ufs/monitor/read_total_sectors
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows how many sectors (in 512 Bytes) have been
                sent from device to host after monitor gets started.
 
@@ -1109,7 +1109,7 @@ Description:      This file shows how many sectors (in 512 Bytes) have been
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/read_total_busy
 What:          /sys/bus/platform/devices/*.ufs/monitor/read_total_busy
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows how long (in micro seconds) has been spent
                sending data from device to host after monitor gets started.
 
@@ -1118,7 +1118,7 @@ Description:      This file shows how long (in micro seconds) has been spent
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/read_nr_requests
 What:          /sys/bus/platform/devices/*.ufs/monitor/read_nr_requests
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows how many read requests have been sent after
                monitor gets started.
 
@@ -1127,7 +1127,7 @@ Description:      This file shows how many read requests have been sent after
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/read_req_latency_max
 What:          /sys/bus/platform/devices/*.ufs/monitor/read_req_latency_max
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the maximum latency (in micro seconds) of
                read requests after monitor gets started.
 
@@ -1136,7 +1136,7 @@ Description:      This file shows the maximum latency (in micro seconds) of
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/read_req_latency_min
 What:          /sys/bus/platform/devices/*.ufs/monitor/read_req_latency_min
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the minimum latency (in micro seconds) of
                read requests after monitor gets started.
 
@@ -1145,7 +1145,7 @@ Description:      This file shows the minimum latency (in micro seconds) of
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/read_req_latency_avg
 What:          /sys/bus/platform/devices/*.ufs/monitor/read_req_latency_avg
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the average latency (in micro seconds) of
                read requests after monitor gets started.
 
@@ -1154,7 +1154,7 @@ Description:      This file shows the average latency (in micro seconds) of
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/read_req_latency_sum
 What:          /sys/bus/platform/devices/*.ufs/monitor/read_req_latency_sum
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the total latency (in micro seconds) of
                read requests sent after monitor gets started.
 
@@ -1163,7 +1163,7 @@ Description:      This file shows the total latency (in micro seconds) of
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/write_total_sectors
 What:          /sys/bus/platform/devices/*.ufs/monitor/write_total_sectors
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows how many sectors (in 512 Bytes) have been sent
                from host to device after monitor gets started.
 
@@ -1172,7 +1172,7 @@ Description:      This file shows how many sectors (in 512 Bytes) have been sent
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/write_total_busy
 What:          /sys/bus/platform/devices/*.ufs/monitor/write_total_busy
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows how long (in micro seconds) has been spent
                sending data from host to device after monitor gets started.
 
@@ -1181,7 +1181,7 @@ Description:      This file shows how long (in micro seconds) has been spent
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/write_nr_requests
 What:          /sys/bus/platform/devices/*.ufs/monitor/write_nr_requests
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows how many write requests have been sent after
                monitor gets started.
 
@@ -1190,7 +1190,7 @@ Description:      This file shows how many write requests have been sent after
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/write_req_latency_max
 What:          /sys/bus/platform/devices/*.ufs/monitor/write_req_latency_max
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the maximum latency (in micro seconds) of write
                requests after monitor gets started.
 
@@ -1199,7 +1199,7 @@ Description:      This file shows the maximum latency (in micro seconds) of write
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/write_req_latency_min
 What:          /sys/bus/platform/devices/*.ufs/monitor/write_req_latency_min
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the minimum latency (in micro seconds) of write
                requests after monitor gets started.
 
@@ -1208,7 +1208,7 @@ Description:      This file shows the minimum latency (in micro seconds) of write
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/write_req_latency_avg
 What:          /sys/bus/platform/devices/*.ufs/monitor/write_req_latency_avg
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the average latency (in micro seconds) of write
                requests after monitor gets started.
 
@@ -1217,7 +1217,7 @@ Description:      This file shows the average latency (in micro seconds) of write
 What:          /sys/bus/platform/drivers/ufshcd/*/monitor/write_req_latency_sum
 What:          /sys/bus/platform/devices/*.ufs/monitor/write_req_latency_sum
 Date:          January 2021
-Contact:       Can Guo <cang@codeaurora.org>
+Contact:       Can Guo <quic_cang@quicinc.com>
 Description:   This file shows the total latency (in micro seconds) of write
                requests after monitor gets started.
 
@@ -1226,7 +1226,7 @@ Description:      This file shows the total latency (in micro seconds) of write
 What:          /sys/bus/platform/drivers/ufshcd/*/device_descriptor/wb_presv_us_en
 What:          /sys/bus/platform/devices/*.ufs/device_descriptor/wb_presv_us_en
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows if preserve user-space was configured
 
                The file is read only.
@@ -1234,7 +1234,7 @@ Description:      This entry shows if preserve user-space was configured
 What:          /sys/bus/platform/drivers/ufshcd/*/device_descriptor/wb_shared_alloc_units
 What:          /sys/bus/platform/devices/*.ufs/device_descriptor/wb_shared_alloc_units
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the shared allocated units of WB buffer
 
                The file is read only.
@@ -1242,7 +1242,7 @@ Description:      This entry shows the shared allocated units of WB buffer
 What:          /sys/bus/platform/drivers/ufshcd/*/device_descriptor/wb_type
 What:          /sys/bus/platform/devices/*.ufs/device_descriptor/wb_type
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the configured WB type.
                0x1 for shared buffer mode. 0x0 for dedicated buffer mode.
 
@@ -1251,7 +1251,7 @@ Description:      This entry shows the configured WB type.
 What:          /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_buff_cap_adj
 What:          /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_buff_cap_adj
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the total user-space decrease in shared
                buffer mode.
                The value of this parameter is 3 for TLC NAND when SLC mode
@@ -1262,7 +1262,7 @@ Description:      This entry shows the total user-space decrease in shared
 What:          /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_max_alloc_units
 What:          /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_max_alloc_units
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the Maximum total WriteBooster Buffer size
                which is supported by the entire device.
 
@@ -1271,7 +1271,7 @@ Description:      This entry shows the Maximum total WriteBooster Buffer size
 What:          /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_max_wb_luns
 What:          /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_max_wb_luns
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the maximum number of luns that can support
                WriteBooster.
 
@@ -1280,7 +1280,7 @@ Description:      This entry shows the maximum number of luns that can support
 What:          /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_sup_red_type
 What:          /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_sup_red_type
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   The supportability of user space reduction mode
                and preserve user space mode.
                00h: WriteBooster Buffer can be configured only in
@@ -1295,7 +1295,7 @@ Description:      The supportability of user space reduction mode
 What:          /sys/bus/platform/drivers/ufshcd/*/geometry_descriptor/wb_sup_wb_type
 What:          /sys/bus/platform/devices/*.ufs/geometry_descriptor/wb_sup_wb_type
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   The supportability of WriteBooster Buffer type.
 
                ===  ==========================================================
@@ -1310,7 +1310,7 @@ Description:      The supportability of WriteBooster Buffer type.
 What:          /sys/bus/platform/drivers/ufshcd/*/flags/wb_enable
 What:          /sys/bus/platform/devices/*.ufs/flags/wb_enable
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the status of WriteBooster.
 
                == ============================
@@ -1323,7 +1323,7 @@ Description:      This entry shows the status of WriteBooster.
 What:          /sys/bus/platform/drivers/ufshcd/*/flags/wb_flush_en
 What:          /sys/bus/platform/devices/*.ufs/flags/wb_flush_en
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows if flush is enabled.
 
                == =================================
@@ -1336,7 +1336,7 @@ Description:      This entry shows if flush is enabled.
 What:          /sys/bus/platform/drivers/ufshcd/*/flags/wb_flush_during_h8
 What:          /sys/bus/platform/devices/*.ufs/flags/wb_flush_during_h8
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   Flush WriteBooster Buffer during hibernate state.
 
                == =================================================
@@ -1351,7 +1351,7 @@ Description:      Flush WriteBooster Buffer during hibernate state.
 What:          /sys/bus/platform/drivers/ufshcd/*/attributes/wb_avail_buf
 What:          /sys/bus/platform/devices/*.ufs/attributes/wb_avail_buf
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the amount of unused WriteBooster buffer
                available.
 
@@ -1360,7 +1360,7 @@ Description:      This entry shows the amount of unused WriteBooster buffer
 What:          /sys/bus/platform/drivers/ufshcd/*/attributes/wb_cur_buf
 What:          /sys/bus/platform/devices/*.ufs/attributes/wb_cur_buf
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the amount of unused current buffer.
 
                The file is read only.
@@ -1368,7 +1368,7 @@ Description:      This entry shows the amount of unused current buffer.
 What:          /sys/bus/platform/drivers/ufshcd/*/attributes/wb_flush_status
 What:          /sys/bus/platform/devices/*.ufs/attributes/wb_flush_status
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the flush operation status.
 
 
@@ -1385,7 +1385,7 @@ Description:      This entry shows the flush operation status.
 What:          /sys/bus/platform/drivers/ufshcd/*/attributes/wb_life_time_est
 What:          /sys/bus/platform/devices/*.ufs/attributes/wb_life_time_est
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows an indication of the WriteBooster Buffer
                lifetime based on the amount of performed program/erase cycles
 
@@ -1399,7 +1399,7 @@ Description:      This entry shows an indication of the WriteBooster Buffer
 
 What:          /sys/class/scsi_device/*/device/unit_descriptor/wb_buf_alloc_units
 Date:          June 2020
-Contact:       Asutosh Das <asutoshd@codeaurora.org>
+Contact:       Asutosh Das <quic_asutoshd@quicinc.com>
 Description:   This entry shows the configured size of WriteBooster buffer.
                0400h corresponds to 4GB.
 
index 0888636..62addab 100644 (file)
@@ -60,3 +60,14 @@ Description: Module taint flags:
                        C   staging driver module
                        E   unsigned module
                        ==  =====================
+
+What:          /sys/module/grant_table/parameters/free_per_iteration
+Date:          July 2023
+KernelVersion: 6.5 but backported to all supported stable branches
+Contact:       Xen developer discussion <xen-devel@lists.xenproject.org>
+Description:   Read and write number of grant entries to attempt to free per iteration.
+
+               Note: Future versions of Xen and Linux may provide a better
+               interface for controlling the rate of deferred grant reclaim
+               or may not need it at all.
+Users:         Qubes OS (https://www.qubes-os.org)
index fca40a5..a80aeda 100644 (file)
@@ -2,7 +2,7 @@ What:           /sys/devices/platform/hidma-*/chid
                /sys/devices/platform/QCOM8061:*/chid
 Date:          Dec 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains the ID of the channel within the HIDMA instance.
                It is used to associate a given HIDMA channel with the
index 3b6c5c9..0373745 100644 (file)
@@ -2,7 +2,7 @@ What:           /sys/devices/platform/hidma-mgmt*/chanops/chan*/priority
                /sys/devices/platform/QCOM8060:*/chanops/chan*/priority
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains either 0 or 1 and indicates if the DMA channel is a
                low priority (0) or high priority (1) channel.
@@ -11,7 +11,7 @@ What:         /sys/devices/platform/hidma-mgmt*/chanops/chan*/weight
                /sys/devices/platform/QCOM8060:*/chanops/chan*/weight
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains 0..15 and indicates the weight of the channel among
                equal priority channels during round robin scheduling.
@@ -20,7 +20,7 @@ What:         /sys/devices/platform/hidma-mgmt*/chreset_timeout_cycles
                /sys/devices/platform/QCOM8060:*/chreset_timeout_cycles
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains the platform specific cycle value to wait after a
                reset command is issued. If the value is chosen too short,
@@ -32,7 +32,7 @@ What:         /sys/devices/platform/hidma-mgmt*/dma_channels
                /sys/devices/platform/QCOM8060:*/dma_channels
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains the number of dma channels supported by one instance
                of HIDMA hardware. The value may change from chip to chip.
@@ -41,7 +41,7 @@ What:         /sys/devices/platform/hidma-mgmt*/hw_version_major
                /sys/devices/platform/QCOM8060:*/hw_version_major
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Version number major for the hardware.
 
@@ -49,7 +49,7 @@ What:         /sys/devices/platform/hidma-mgmt*/hw_version_minor
                /sys/devices/platform/QCOM8060:*/hw_version_minor
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Version number minor for the hardware.
 
@@ -57,7 +57,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_rd_xactions
                /sys/devices/platform/QCOM8060:*/max_rd_xactions
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains a value between 0 and 31. Maximum number of
                read transactions that can be issued back to back.
@@ -69,7 +69,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_read_request
                /sys/devices/platform/QCOM8060:*/max_read_request
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Size of each read request. The value needs to be a power
                of two and can be between 128 and 1024.
@@ -78,7 +78,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_wr_xactions
                /sys/devices/platform/QCOM8060:*/max_wr_xactions
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Contains a value between 0 and 31. Maximum number of
                write transactions that can be issued back to back.
@@ -91,7 +91,7 @@ What:         /sys/devices/platform/hidma-mgmt*/max_write_request
                /sys/devices/platform/QCOM8060:*/max_write_request
 Date:          Nov 2015
 KernelVersion: 4.4
-Contact:       "Sinan Kaya <okaya@codeaurora.org>"
+Contact:       "Sinan Kaya <okaya@kernel.org>"
 Description:
                Size of each write request. The value needs to be a power
                of two and can be between 128 and 1024.
index 2a5c79d..bcbc4b3 100644 (file)
@@ -10,7 +10,7 @@ misuses of the RCU API, most notably using one of the rcu_dereference()
 family to access an RCU-protected pointer without the proper protection.
 When such misuse is detected, an lockdep-RCU splat is emitted.
 
-The usual cause of a lockdep-RCU slat is someone accessing an
+The usual cause of a lockdep-RCU splat is someone accessing an
 RCU-protected data structure without either (1) being in the right kind of
 RCU read-side critical section or (2) holding the right update-side lock.
 This problem can therefore be serious: it might result in random memory
index 9a734bf..21e40fc 100644 (file)
@@ -18,7 +18,16 @@ to solve following problem.
 
 Without 'nulls', a typical RCU linked list managing objects which are
 allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can use the following
-algorithms:
+algorithms.  Following examples assume 'obj' is a pointer to such
+objects, which is having below type.
+
+::
+
+  struct object {
+    struct hlist_node obj_node;
+    atomic_t refcnt;
+    unsigned int key;
+  };
 
 1) Lookup algorithm
 -------------------
@@ -26,11 +35,13 @@ algorithms:
 ::
 
   begin:
-  rcu_read_lock()
+  rcu_read_lock();
   obj = lockless_lookup(key);
   if (obj) {
-    if (!try_get_ref(obj)) // might fail for free objects
+    if (!try_get_ref(obj)) { // might fail for free objects
+      rcu_read_unlock();
       goto begin;
+    }
     /*
     * Because a writer could delete object, and a writer could
     * reuse these object before the RCU grace period, we
@@ -54,7 +65,7 @@ but a version with an additional memory barrier (smp_rmb())
     struct hlist_node *node, *next;
     for (pos = rcu_dereference((head)->first);
          pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
-         ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+         ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
          pos = rcu_dereference(next))
       if (obj->key == key)
         return obj;
@@ -66,10 +77,10 @@ And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb()::
   struct hlist_node *node;
   for (pos = rcu_dereference((head)->first);
        pos && ({ prefetch(pos->next); 1; }) &&
-       ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+       ({ obj = hlist_entry(pos, typeof(*obj), obj_node); 1; });
        pos = rcu_dereference(pos->next))
-   if (obj->key == key)
-     return obj;
+    if (obj->key == key)
+      return obj;
   return NULL;
 
 Quoting Corey Minyard::
@@ -86,7 +97,7 @@ Quoting Corey Minyard::
 2) Insertion algorithm
 ----------------------
 
-We need to make sure a reader cannot read the new 'obj->obj_next' value
+We need to make sure a reader cannot read the new 'obj->obj_node.next' value
 and previous value of 'obj->key'. Otherwise, an item could be deleted
 from a chain, and inserted into another chain. If new chain was empty
 before the move, 'next' pointer is NULL, and lockless reader can not
@@ -129,8 +140,7 @@ very very fast (before the end of RCU grace period)
 Avoiding extra smp_rmb()
 ========================
 
-With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
-and extra _release() in insert function.
+With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup().
 
 For example, if we choose to store the slot number as the 'nulls'
 end-of-list marker for each slot of the hash table, we can detect
@@ -142,6 +152,9 @@ the beginning. If the object was moved to the same chain,
 then the reader doesn't care: It might occasionally
 scan the list again without harm.
 
+Note that using hlist_nulls means the type of 'obj_node' field of
+'struct object' becomes 'struct hlist_nulls_node'.
+
 
 1) lookup algorithm
 -------------------
@@ -151,7 +164,7 @@ scan the list again without harm.
   head = &table[slot];
   begin:
   rcu_read_lock();
-  hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
+  hlist_nulls_for_each_entry_rcu(obj, node, head, obj_node) {
     if (obj->key == key) {
       if (!try_get_ref(obj)) { // might fail for free objects
        rcu_read_unlock();
@@ -182,6 +195,9 @@ scan the list again without harm.
 2) Insert algorithm
 -------------------
 
+Same to the above one, but uses hlist_nulls_add_head_rcu() instead of
+hlist_add_head_rcu().
+
 ::
 
   /*
index 06c525e..b1b57f6 100644 (file)
                 45 = /dev/ttyMM1               Marvell MPSC - port 1 (obsolete unused)
                 46 = /dev/ttyCPM0              PPC CPM (SCC or SMC) - port 0
                    ...
-                47 = /dev/ttyCPM5              PPC CPM (SCC or SMC) - port 5
+                49 = /dev/ttyCPM5              PPC CPM (SCC or SMC) - port 3
                 50 = /dev/ttyIOC0              Altix serial card
                    ...
                 81 = /dev/ttyIOC31             Altix serial card
diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
new file mode 100644 (file)
index 0000000..264bfa9
--- /dev/null
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+GDS - Gather Data Sampling
+==========================
+
+Gather Data Sampling is a hardware vulnerability which allows unprivileged
+speculative access to data which was previously stored in vector registers.
+
+Problem
+-------
+When a gather instruction performs loads from memory, different data elements
+are merged into the destination vector register. However, when a gather
+instruction that is transiently executed encounters a fault, stale data from
+architectural or internal vector registers may get transiently forwarded to the
+destination vector register instead. This will allow a malicious attacker to
+infer stale data using typical side channel techniques like cache timing
+attacks. GDS is a purely sampling-based attack.
+
+The attacker uses gather instructions to infer the stale vector register data.
+The victim does not need to do anything special other than use the vector
+registers. The victim does not need to use gather instructions to be
+vulnerable.
+
+Because the buffers are shared between Hyper-Threads cross Hyper-Thread attacks
+are possible.
+
+Attack scenarios
+----------------
+Without mitigation, GDS can infer stale data across virtually all
+permission boundaries:
+
+       Non-enclaves can infer SGX enclave data
+       Userspace can infer kernel data
+       Guests can infer data from hosts
+       Guest can infer guest from other guests
+       Users can infer data from other users
+
+Because of this, it is important to ensure that the mitigation stays enabled in
+lower-privilege contexts like guests and when running outside SGX enclaves.
+
+The hardware enforces the mitigation for SGX. Likewise, VMMs should  ensure
+that guests are not allowed to disable the GDS mitigation. If a host erred and
+allowed this, a guest could theoretically disable GDS mitigation, mount an
+attack, and re-enable it.
+
+Mitigation mechanism
+--------------------
+This issue is mitigated in microcode. The microcode defines the following new
+bits:
+
+ ================================   ===   ============================
+ IA32_ARCH_CAPABILITIES[GDS_CTRL]   R/O   Enumerates GDS vulnerability
+                                          and mitigation support.
+ IA32_ARCH_CAPABILITIES[GDS_NO]     R/O   Processor is not vulnerable.
+ IA32_MCU_OPT_CTRL[GDS_MITG_DIS]    R/W   Disables the mitigation
+                                          0 by default.
+ IA32_MCU_OPT_CTRL[GDS_MITG_LOCK]   R/W   Locks GDS_MITG_DIS=0. Writes
+                                          to GDS_MITG_DIS are ignored
+                                          Can't be cleared once set.
+ ================================   ===   ============================
+
+GDS can also be mitigated on systems that don't have updated microcode by
+disabling AVX. This can be done by setting gather_data_sampling="force" or
+"clearcpuid=avx" on the kernel command-line.
+
+If used, these options will disable AVX use by turning off XSAVE YMM support.
+However, the processor will still enumerate AVX support.  Userspace that
+does not follow proper AVX enumeration to check both AVX *and* XSAVE YMM
+support will break.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The mitigation can be disabled by setting "gather_data_sampling=off" or
+"mitigations=off" on the kernel command line. Not specifying either will default
+to the mitigation being enabled. Specifying "gather_data_sampling=force" will
+use the microcode mitigation when available or disable AVX on affected systems
+where the microcode hasn't been updated to include the mitigation.
+
+GDS System Information
+------------------------
+The kernel provides vulnerability status information through sysfs. For
+GDS this can be accessed by the following sysfs file:
+
+/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+
+The possible values contained in this file are:
+
+ ============================== =============================================
+ Not affected                   Processor not vulnerable.
+ Vulnerable                     Processor vulnerable and mitigation disabled.
+ Vulnerable: No microcode       Processor vulnerable and microcode is missing
+                                mitigation.
+ Mitigation: AVX disabled,
+ no microcode                   Processor is vulnerable and microcode is missing
+                                mitigation. AVX disabled as mitigation.
+ Mitigation: Microcode          Processor is vulnerable and mitigation is in
+                                effect.
+ Mitigation: Microcode (locked) Processor is vulnerable and mitigation is in
+                                effect and cannot be disabled.
+ Unknown: Dependent on
+ hypervisor status              Running on a virtual guest processor that is
+                                affected but with no way to know if host
+                                processor is mitigated or vulnerable.
+ ============================== =============================================
+
+GDS Default mitigation
+----------------------
+The updated microcode will enable the mitigation by default. The kernel's
+default action is to leave the mitigation enabled.
index e061476..de99caa 100644 (file)
@@ -13,9 +13,11 @@ are configurable at compile, boot or run time.
    l1tf
    mds
    tsx_async_abort
-   multihit.rst
-   special-register-buffer-data-sampling.rst
-   core-scheduling.rst
-   l1d_flush.rst
-   processor_mmio_stale_data.rst
-   cross-thread-rsb.rst
+   multihit
+   special-register-buffer-data-sampling
+   core-scheduling
+   l1d_flush
+   processor_mmio_stale_data
+   cross-thread-rsb
+   srso
+   gather_data_sampling
index 4d186f5..32a8893 100644 (file)
@@ -484,11 +484,14 @@ Spectre variant 2
 
    Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
    boot, by setting the IBRS bit, and they're automatically protected against
-   Spectre v2 variant attacks, including cross-thread branch target injections
-   on SMT systems (STIBP). In other words, eIBRS enables STIBP too.
+   Spectre v2 variant attacks.
 
-   Legacy IBRS systems clear the IBRS bit on exit to userspace and
-   therefore explicitly enable STIBP for that
+   On Intel's enhanced IBRS systems, this includes cross-thread branch target
+   injections on SMT systems (STIBP). In other words, Intel eIBRS enables
+   STIBP, too.
+
+   AMD Automatic IBRS does not protect userspace, and Legacy IBRS systems clear
+   the IBRS bit on exit to userspace, therefore both explicitly enable STIBP.
 
    The retpoline mitigation is turned on by default on vulnerable
    CPUs. It can be forced on or off by the administrator
diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst
new file mode 100644 (file)
index 0000000..b6cfb51
--- /dev/null
@@ -0,0 +1,150 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Speculative Return Stack Overflow (SRSO)
+========================================
+
+This is a mitigation for the speculative return stack overflow (SRSO)
+vulnerability found on AMD processors. The mechanism is by now the well
+known scenario of poisoning CPU functional units - the Branch Target
+Buffer (BTB) and Return Address Predictor (RAP) in this case - and then
+tricking the elevated privilege domain (the kernel) into leaking
+sensitive data.
+
+AMD CPUs predict RET instructions using a Return Address Predictor (aka
+Return Address Stack/Return Stack Buffer). In some cases, a non-architectural
+CALL instruction (i.e., an instruction predicted to be a CALL but is
+not actually a CALL) can create an entry in the RAP which may be used
+to predict the target of a subsequent RET instruction.
+
+The specific circumstances that lead to this varies by microarchitecture
+but the concern is that an attacker can mis-train the CPU BTB to predict
+non-architectural CALL instructions in kernel space and use this to
+control the speculative target of a subsequent kernel RET, potentially
+leading to information disclosure via a speculative side-channel.
+
+The issue is tracked under CVE-2023-20569.
+
+Affected processors
+-------------------
+
+AMD Zen, generations 1-4. That is, all families 0x17 and 0x19. Older
+processors have not been investigated.
+
+System information and options
+------------------------------
+
+First of all, it is required that the latest microcode be loaded for
+mitigations to be effective.
+
+The sysfs file showing SRSO mitigation status is:
+
+  /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow
+
+The possible values in this file are:
+
+ * 'Not affected':
+
+   The processor is not vulnerable
+
+ * 'Vulnerable: no microcode':
+
+   The processor is vulnerable, no microcode extending IBPB
+   functionality to address the vulnerability has been applied.
+
+ * 'Mitigation: microcode':
+
+   Extended IBPB functionality microcode patch has been applied. It does
+   not address User->Kernel and Guest->Host transitions protection but it
+   does address User->User and VM->VM attack vectors.
+
+   Note that User->User mitigation is controlled by how the IBPB aspect in
+   the Spectre v2 mitigation is selected:
+
+    * conditional IBPB:
+
+      where each process can select whether it needs an IBPB issued
+      around it PR_SPEC_DISABLE/_ENABLE etc, see :doc:`spectre`
+
+    * strict:
+
+      i.e., always on - by supplying spectre_v2_user=on on the kernel
+      command line
+
+   (spec_rstack_overflow=microcode)
+
+ * 'Mitigation: safe RET':
+
+   Software-only mitigation. It complements the extended IBPB microcode
+   patch functionality by addressing User->Kernel and Guest->Host
+   transitions protection.
+
+   Selected by default or by spec_rstack_overflow=safe-ret
+
+ * 'Mitigation: IBPB':
+
+   Similar protection as "safe RET" above but employs an IBPB barrier on
+   privilege domain crossings (User->Kernel, Guest->Host).
+
+  (spec_rstack_overflow=ibpb)
+
+ * 'Mitigation: IBPB on VMEXIT':
+
+   Mitigation addressing the cloud provider scenario - the Guest->Host
+   transitions only.
+
+   (spec_rstack_overflow=ibpb-vmexit)
+
+
+
+In order to exploit vulnerability, an attacker needs to:
+
+ - gain local access on the machine
+
+ - break kASLR
+
+ - find gadgets in the running kernel in order to use them in the exploit
+
+ - potentially create and pin an additional workload on the sibling
+   thread, depending on the microarchitecture (not necessary on fam 0x19)
+
+ - run the exploit
+
+Considering the performance implications of each mitigation type, the
+default one is 'Mitigation: safe RET' which should take care of most
+attack vectors, including the local User->Kernel one.
+
+As always, the user is advised to keep her/his system up-to-date by
+applying software updates regularly.
+
+The default setting will be reevaluated when needed and especially when
+new attack vectors appear.
+
+As one can surmise, 'Mitigation: safe RET' does come at the cost of some
+performance depending on the workload. If one trusts her/his userspace
+and does not want to suffer the performance impact, one can always
+disable the mitigation with spec_rstack_overflow=off.
+
+Similarly, 'Mitigation: IBPB' is another full mitigation type employing
+an indrect branch prediction barrier after having applied the required
+microcode patch for one's system. This mitigation comes also at
+a performance cost.
+
+Mitigation: safe RET
+--------------------
+
+The mitigation works by ensuring all RET instructions speculate to
+a controlled location, similar to how speculation is controlled in the
+retpoline sequence.  To accomplish this, the __x86_return_thunk forces
+the CPU to mispredict every function return using a 'safe return'
+sequence.
+
+To ensure the safety of this mitigation, the kernel must ensure that the
+safe return sequence is itself free from attacker interference.  In Zen3
+and Zen4, this is accomplished by creating a BTB alias between the
+untraining function srso_alias_untrain_ret() and the safe return
+function srso_alias_safe_ret() which results in evicting a potentially
+poisoned BTB entry and using that safe one for all function returns.
+
+In older Zen1 and Zen2, this is accomplished using a reinterpretation
+technique similar to Retbleed one: srso_untrain_ret() and
+srso_safe_ret().
index c18d94f..f8ebb63 100644 (file)
@@ -624,3 +624,9 @@ Used to get the correct ranges:
   * VMALLOC_START ~ VMALLOC_END : vmalloc() / ioremap() space.
   * VMEMMAP_START ~ VMEMMAP_END : vmemmap space, used for struct page array.
   * KERNEL_LINK_ADDR : start address of Kernel link and BPF
+
+va_kernel_pa_offset
+-------------------
+
+Indicates the offset between the kernel virtual and physical mappings.
+Used to translate virtual to physical addresses.
index a145799..0c38a8a 100644 (file)
                        others).
 
        ccw_timeout_log [S390]
-                       See Documentation/s390/common_io.rst for details.
+                       See Documentation/arch/s390/common_io.rst for details.
 
        cgroup_disable= [KNL] Disable a particular controller or optional feature
                        Format: {name of the controller(s) or feature(s) to disable}
                        Setting checkreqprot to 1 is deprecated.
 
        cio_ignore=     [S390]
-                       See Documentation/s390/common_io.rst for details.
+                       See Documentation/arch/s390/common_io.rst for details.
 
        clearcpuid=X[,X...] [X86]
                        Disable CPUID feature X for the kernel. See
                        Format: off | on
                        default: on
 
+       gather_data_sampling=
+                       [X86,INTEL] Control the Gather Data Sampling (GDS)
+                       mitigation.
+
+                       Gather Data Sampling is a hardware vulnerability which
+                       allows unprivileged speculative access to data which was
+                       previously stored in vector registers.
+
+                       This issue is mitigated by default in updated microcode.
+                       The mitigation may have a performance impact but can be
+                       disabled. On systems without the microcode mitigation
+                       disabling AVX serves as a mitigation.
+
+                       force:  Disable AVX to mitigate systems without
+                               microcode mitigation. No effect if the microcode
+                               mitigation is present. Known to cause crashes in
+                               userspace with buggy AVX enumeration.
+
+                       off:    Disable GDS mitigation.
+
        gcov_persist=   [GCOV] When non-zero (default), profiling data for
                        kernel modules is saved and remains accessible via
                        debugfs, even when the module is unloaded/reloaded.
        locktorture.torture_type= [KNL]
                        Specify the locking implementation to test.
 
+       locktorture.writer_fifo= [KNL]
+                       Run the write-side locktorture kthreads at
+                       sched_set_fifo() real-time priority.
+
        locktorture.verbose= [KNL]
                        Enable additional printk() statements.
 
                                Disable all optional CPU mitigations.  This
                                improves system performance, but it may also
                                expose users to several CPU vulnerabilities.
-                               Equivalent to: nopti [X86,PPC]
-                                              if nokaslr then kpti=0 [ARM64]
-                                              nospectre_v1 [X86,PPC]
-                                              nobp=0 [S390]
-                                              nospectre_v2 [X86,PPC,S390,ARM64]
-                                              spectre_v2_user=off [X86]
-                                              spec_store_bypass_disable=off [X86,PPC]
-                                              ssbd=force-off [ARM64]
-                                              nospectre_bhb [ARM64]
+                               Equivalent to: if nokaslr then kpti=0 [ARM64]
+                                              gather_data_sampling=off [X86]
+                                              kvm.nx_huge_pages=off [X86]
                                               l1tf=off [X86]
                                               mds=off [X86]
-                                              tsx_async_abort=off [X86]
-                                              kvm.nx_huge_pages=off [X86]
-                                              srbds=off [X86,INTEL]
+                                              mmio_stale_data=off [X86]
                                               no_entry_flush [PPC]
                                               no_uaccess_flush [PPC]
-                                              mmio_stale_data=off [X86]
+                                              nobp=0 [S390]
+                                              nopti [X86,PPC]
+                                              nospectre_bhb [ARM64]
+                                              nospectre_v1 [X86,PPC]
+                                              nospectre_v2 [X86,PPC,S390,ARM64]
                                               retbleed=off [X86]
+                                              spec_store_bypass_disable=off [X86,PPC]
+                                              spectre_v2_user=off [X86]
+                                              srbds=off [X86,INTEL]
+                                              ssbd=force-off [ARM64]
+                                              tsx_async_abort=off [X86]
 
                                Exceptions:
                                               This does not have any effect on
                        test until boot completes in order to avoid
                        interference.
 
+       rcuscale.kfree_by_call_rcu= [KNL]
+                       In kernels built with CONFIG_RCU_LAZY=y, test
+                       call_rcu() instead of kfree_rcu().
+
+       rcuscale.kfree_mult= [KNL]
+                       Instead of allocating an object of size kfree_obj,
+                       allocate one of kfree_mult * sizeof(kfree_obj).
+                       Defaults to 1.
+
        rcuscale.kfree_rcu_test= [KNL]
                        Set to measure performance of kfree_rcu() flooding.
 
                        Number of loops doing rcuscale.kfree_alloc_num number
                        of allocations and frees.
 
+       rcuscale.minruntime= [KNL]
+                       Set the minimum test run time in seconds.  This
+                       does not affect the data-collection interval,
+                       but instead allows better measurement of things
+                       like CPU consumption.
+
        rcuscale.nreaders= [KNL]
                        Set number of RCU readers.  The value -1 selects
                        N, where N is the number of CPUs.  A value
                        the same as for rcuscale.nreaders.
                        N, where N is the number of CPUs
 
-       rcuscale.perf_type= [KNL]
+       rcuscale.scale_type= [KNL]
                        Specify the RCU implementation to test.
 
        rcuscale.shutdown= [KNL]
                        in microseconds.  The default of zero says
                        no holdoff.
 
+       rcuscale.writer_holdoff_jiffies= [KNL]
+                       Additional write-side holdoff between grace
+                       periods, but in jiffies.  The default of zero
+                       says no holdoff.
+
        rcutorture.fqs_duration= [KNL]
                        Set duration of force_quiescent_state bursts
                        in microseconds.
                        number avoids disturbing real-time workloads,
                        but lengthens grace periods.
 
+       rcupdate.rcu_task_lazy_lim= [KNL]
+                       Number of callbacks on a given CPU that will
+                       cancel laziness on that CPU.  Use -1 to disable
+                       cancellation of laziness, but be advised that
+                       doing so increases the danger of OOM due to
+                       callback flooding.
+
        rcupdate.rcu_task_stall_info= [KNL]
                        Set initial timeout in jiffies for RCU task stall
                        informational messages, which give some indication
                        A change in value does not take effect until
                        the beginning of the next grace period.
 
+       rcupdate.rcu_tasks_lazy_ms= [KNL]
+                       Set timeout in milliseconds RCU Tasks asynchronous
+                       callback batching for call_rcu_tasks().
+                       A negative value will take the default.  A value
+                       of zero will disable batching.  Batching is
+                       always disabled for synchronize_rcu_tasks().
+
+       rcupdate.rcu_tasks_rude_lazy_ms= [KNL]
+                       Set timeout in milliseconds RCU Tasks
+                       Rude asynchronous callback batching for
+                       call_rcu_tasks_rude().  A negative value
+                       will take the default.  A value of zero will
+                       disable batching.  Batching is always disabled
+                       for synchronize_rcu_tasks_rude().
+
+       rcupdate.rcu_tasks_trace_lazy_ms= [KNL]
+                       Set timeout in milliseconds RCU Tasks
+                       Trace asynchronous callback batching for
+                       call_rcu_tasks_trace().  A negative value
+                       will take the default.  A value of zero will
+                       disable batching.  Batching is always disabled
+                       for synchronize_rcu_tasks_trace().
+
        rcupdate.rcu_self_test= [KNL]
                        Run the RCU early boot self tests
 
                        Useful for devices that are detected asynchronously
                        (e.g. USB and MMC devices).
 
+       rootwait=       [KNL] Maximum time (in seconds) to wait for root device
+                       to show up before attempting to mount the root
+                       filesystem.
+
        rproc_mem=nn[KMG][@address]
                        [KNL,ARM,CMA] Remoteproc physical memory block.
                        Memory area to be used by remote processor image,
                        Not specifying this option is equivalent to
                        spectre_v2_user=auto.
 
+       spec_rstack_overflow=
+                       [X86] Control RAS overflow mitigation on AMD Zen CPUs
+
+                       off             - Disable mitigation
+                       microcode       - Enable microcode mitigation only
+                       safe-ret        - Enable sw-only safe RET mitigation (default)
+                       ibpb            - Enable mitigation by issuing IBPB on
+                                         kernel entry
+                       ibpb-vmexit     - Issue IBPB only on VMEXIT
+                                         (cloud-specific mitigation)
+
        spec_store_bypass_disable=
                        [HW] Control Speculative Store Bypass (SSB) Disable mitigation
                        (Speculative Store Bypass vulnerability)
                        -1: disable all critical trip points in all thermal zones
                        <degrees C>: override all critical trip points
 
-       thermal.nocrt=  [HW,ACPI]
-                       Set to disable actions on ACPI thermal zone
-                       critical and hot trip points.
-
        thermal.off=    [HW,ACPI]
                        1: disable ACPI thermal control
 
index 496cdca..e96f057 100644 (file)
@@ -63,6 +63,14 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A510     | #1902691        | ARM64_ERRATUM_1902691       |
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2051678        | ARM64_ERRATUM_2051678       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2077057        | ARM64_ERRATUM_2077057       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
++----------------+-----------------+-----------------+-----------------------------+
+| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
++----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A53      | #826319         | ARM64_ERRATUM_826319        |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A53      | #827319         | ARM64_ERRATUM_827319        |
@@ -109,14 +117,6 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A77      | #1508412        | ARM64_ERRATUM_1508412       |
 +----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A510     | #2051678        | ARM64_ERRATUM_2051678       |
-+----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A510     | #2077057        | ARM64_ERRATUM_2077057       |
-+----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
-+----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A510     | #2658417        | ARM64_ERRATUM_2658417       |
-+----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2119858        | ARM64_ERRATUM_2119858       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2054223        | ARM64_ERRATUM_2054223       |
@@ -148,6 +148,9 @@ stable kernels.
 | ARM            | MMU-700         | #2268618,2812531| N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
+| ARM            | GIC-700         | #2941627        | ARM64_ERRATUM_2941627       |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
 | Broadcom       | Brahma-B53      | N/A             | ARM64_ERRATUM_845719        |
 +----------------+-----------------+-----------------+-----------------------------+
 | Broadcom       | Brahma-B53      | N/A             | ARM64_ERRATUM_843419        |
@@ -195,6 +198,9 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | Hisilicon      | Hip08 SMMU PMCG | #162001800      | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
+| Hisilicon      | Hip08 SMMU PMCG | #162001900      | N/A                         |
+|                | Hip09 SMMU PMCG |                 |                             |
++----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
 | Qualcomm Tech. | Kryo/Falkor v1  | E1003           | QCOM_FALKOR_ERRATUM_1003    |
 +----------------+-----------------+-----------------+-----------------------------+
index ba529a1..3d0e53e 100644 (file)
@@ -322,7 +322,7 @@ The regset data starts with struct user_za_header, containing:
   VL is supported.
 
 * The size and layout of the payload depends on the header fields.  The
-  SME_PT_ZA_*() macros are provided to facilitate access to the data.
+  ZA_PT_ZA*() macros are provided to facilitate access to the data.
 
 * In either case, for SETREGSET it is permissible to omit the payload, in which
   case the vector length and flags are changed and PSTATE.ZA is set to 0
index 8458b88..c9a2098 100644 (file)
@@ -21,7 +21,7 @@ implementation.
    parisc/index
    ../powerpc/index
    ../riscv/index
-   ../s390/index
+   s390/index
    sh/index
    sparc/index
    x86/index
similarity index 99%
rename from Documentation/s390/3270.rst
rename to Documentation/arch/s390/3270.rst
index e09e779..467eace 100644 (file)
@@ -116,7 +116,7 @@ Here are the installation steps in detail:
        as a 3270, not a 3215.
 
        5. Run the 3270 configuration script config3270.  It is
-       distributed in this same directory, Documentation/s390, as
+       distributed in this same directory, Documentation/arch/s390, as
        config3270.sh.  Inspect the output script it produces,
        /tmp/mkdev3270, and then run that script.  This will create the
        necessary character special device files and make the necessary
@@ -125,7 +125,7 @@ Here are the installation steps in detail:
        Then notify /sbin/init that /etc/inittab has changed, by issuing
        the telinit command with the q operand::
 
-               cd Documentation/s390
+               cd Documentation/arch/s390
                sh config3270.sh
                sh /tmp/mkdev3270
                telinit q
similarity index 99%
rename from Documentation/s390/cds.rst
rename to Documentation/arch/s390/cds.rst
index 7006d82..bcad2a1 100644 (file)
@@ -39,7 +39,7 @@ some of them are ESA/390 platform specific.
 
 Note:
   In order to write a driver for S/390, you also need to look into the interface
-  described in Documentation/s390/driver-model.rst.
+  described in Documentation/arch/s390/driver-model.rst.
 
 Note for porting drivers from 2.4:
 
similarity index 98%
rename from Documentation/s390/common_io.rst
rename to Documentation/arch/s390/common_io.rst
index 8464856..6dcb40c 100644 (file)
@@ -136,5 +136,5 @@ debugfs entries
 
   The level of logging can be changed to be more or less verbose by piping to
   /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
-  documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst)
+  documentation on the S/390 debug feature (Documentation/arch/s390/s390dbf.rst)
   for details.
similarity index 99%
rename from Documentation/s390/pci.rst
rename to Documentation/arch/s390/pci.rst
index a1a72a4..d575548 100644 (file)
@@ -40,7 +40,7 @@ For example:
   Change the level of logging to be more or less verbose by piping
   a number between 0 and 6 to  /sys/kernel/debug/s390dbf/pci_*/level. For
   details, see the documentation on the S/390 debug feature at
-  Documentation/s390/s390dbf.rst.
+  Documentation/arch/s390/s390dbf.rst.
 
 Sysfs entries
 =============
similarity index 99%
rename from Documentation/s390/vfio-ccw.rst
rename to Documentation/arch/s390/vfio-ccw.rst
index 37026fa..42960b7 100644 (file)
@@ -440,6 +440,6 @@ Reference
 1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
 2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
 3. https://en.wikipedia.org/wiki/Channel_I/O
-4. Documentation/s390/cds.rst
+4. Documentation/arch/s390/cds.rst
 5. Documentation/driver-api/vfio.rst
 6. Documentation/driver-api/vfio-mediated-device.rst
index 33520ec..cdbca15 100644 (file)
@@ -1417,7 +1417,7 @@ execution context provided by the EFI firmware.
 
 The function prototype for the handover entry point looks like this::
 
-    efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp)
+    efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)
 
 'handle' is the EFI image handle passed to the boot loader by the EFI
 firmware, 'table' is the EFI system table - these are the first two
index e6f5bc3..b9ae591 100644 (file)
@@ -395,8 +395,8 @@ multi-instance state the following function is available:
 * cpuhp_setup_state_multi(state, name, startup, teardown)
 
 The @state argument is either a statically allocated state or one of the
-constants for dynamically allocated states - CPUHP_PREPARE_DYN,
-CPUHP_ONLINE_DYN - depending on the state section (PREPARE, ONLINE) for
+constants for dynamically allocated states - CPUHP_BP_PREPARE_DYN,
+CPUHP_AP_ONLINE_DYN - depending on the state section (PREPARE, ONLINE) for
 which a dynamic state should be allocated.
 
 The @name argument is used for sysfs output and for instrumentation. The
@@ -588,7 +588,7 @@ notifications on online and offline operations::
 Setup and teardown a dynamically allocated state in the ONLINE section
 for notifications on offline operations::
 
-   state = cpuhp_setup_state(CPUHP_ONLINE_DYN, "subsys:offline", NULL, subsys_cpu_offline);
+   state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "subsys:offline", NULL, subsys_cpu_offline);
    if (state < 0)
        return state;
    ....
@@ -597,7 +597,7 @@ for notifications on offline operations::
 Setup and teardown a dynamically allocated state in the ONLINE section
 for notifications on online operations without invoking the callbacks::
 
-   state = cpuhp_setup_state_nocalls(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, NULL);
+   state = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "subsys:online", subsys_cpu_online, NULL);
    if (state < 0)
        return state;
    ....
@@ -606,7 +606,7 @@ for notifications on online operations without invoking the callbacks::
 Setup, use and teardown a dynamically allocated multi-instance state in the
 ONLINE section for notifications on online and offline operation::
 
-   state = cpuhp_setup_state_multi(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, subsys_cpu_offline);
+   state = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "subsys:online", subsys_cpu_online, subsys_cpu_offline);
    if (state < 0)
        return state;
    ....
index e14358b..99b5e95 100644 (file)
@@ -49,9 +49,14 @@ properties:
           - arm,cortex-a77-pmu
           - arm,cortex-a78-pmu
           - arm,cortex-a510-pmu
+          - arm,cortex-a520-pmu
           - arm,cortex-a710-pmu
+          - arm,cortex-a715-pmu
+          - arm,cortex-a720-pmu
           - arm,cortex-x1-pmu
           - arm,cortex-x2-pmu
+          - arm,cortex-x3-pmu
+          - arm,cortex-x4-pmu
           - arm,neoverse-e1-pmu
           - arm,neoverse-n1-pmu
           - arm,neoverse-n2-pmu
index a6b3bb8..c1d225f 100644 (file)
@@ -49,6 +49,7 @@ properties:
       - description: Frequency domain 0 register region
       - description: Frequency domain 1 register region
       - description: Frequency domain 2 register region
+      - description: Frequency domain 3 register region
 
   reg-names:
     minItems: 1
@@ -56,6 +57,7 @@ properties:
       - const: freq-domain0
       - const: freq-domain1
       - const: freq-domain2
+      - const: freq-domain3
 
   clocks:
     items:
@@ -69,7 +71,7 @@ properties:
 
   interrupts:
     minItems: 1
-    maxItems: 3
+    maxItems: 4
 
   interrupt-names:
     minItems: 1
@@ -77,6 +79,7 @@ properties:
       - const: dcvsh-irq-0
       - const: dcvsh-irq-1
       - const: dcvsh-irq-2
+      - const: dcvsh-irq-3
 
   '#freq-domain-cells':
     const: 1
diff --git a/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt b/Documentation/devicetree/bindings/cpufreq/ti-cpufreq.txt
deleted file mode 100644 (file)
index 1758051..0000000
+++ /dev/null
@@ -1,132 +0,0 @@
-TI CPUFreq and OPP bindings
-================================
-
-Certain TI SoCs, like those in the am335x, am437x, am57xx, and dra7xx
-families support different OPPs depending on the silicon variant in use.
-The ti-cpufreq driver can use revision and an efuse value from the SoC to
-provide the OPP framework with supported hardware information. This is
-used to determine which OPPs from the operating-points-v2 table get enabled
-when it is parsed by the OPP framework.
-
-Required properties:
---------------------
-In 'cpus' nodes:
-- operating-points-v2: Phandle to the operating-points-v2 table to use.
-
-In 'operating-points-v2' table:
-- compatible: Should be
-       - 'operating-points-v2-ti-cpu' for am335x, am43xx, and dra7xx/am57xx,
-         omap34xx, omap36xx and am3517 SoCs
-- syscon: A phandle pointing to a syscon node representing the control module
-         register space of the SoC.
-
-Optional properties:
---------------------
-- "vdd-supply", "vbb-supply": to define two regulators for dra7xx
-- "cpu0-supply", "vbb-supply": to define two regulators for omap36xx
-
-For each opp entry in 'operating-points-v2' table:
-- opp-supported-hw: Two bitfields indicating:
-       1. Which revision of the SoC the OPP is supported by
-       2. Which eFuse bits indicate this OPP is available
-
-       A bitwise AND is performed against these values and if any bit
-       matches, the OPP gets enabled.
-
-Example:
---------
-
-/* From arch/arm/boot/dts/am33xx.dtsi */
-cpus {
-       #address-cells = <1>;
-       #size-cells = <0>;
-       cpu@0 {
-               compatible = "arm,cortex-a8";
-               device_type = "cpu";
-               reg = <0>;
-
-               operating-points-v2 = <&cpu0_opp_table>;
-
-               clocks = <&dpll_mpu_ck>;
-               clock-names = "cpu";
-
-               clock-latency = <300000>; /* From omap-cpufreq driver */
-       };
-};
-
-/*
- * cpu0 has different OPPs depending on SoC revision and some on revisions
- * 0x2 and 0x4 have eFuse bits that indicate if they are available or not
- */
-cpu0_opp_table: opp-table {
-       compatible = "operating-points-v2-ti-cpu";
-       syscon = <&scm_conf>;
-
-       /*
-        * The three following nodes are marked with opp-suspend
-        * because they can not be enabled simultaneously on a
-        * single SoC.
-        */
-       opp50-300000000 {
-               opp-hz = /bits/ 64 <300000000>;
-               opp-microvolt = <950000 931000 969000>;
-               opp-supported-hw = <0x06 0x0010>;
-               opp-suspend;
-       };
-
-       opp100-275000000 {
-               opp-hz = /bits/ 64 <275000000>;
-               opp-microvolt = <1100000 1078000 1122000>;
-               opp-supported-hw = <0x01 0x00FF>;
-               opp-suspend;
-       };
-
-       opp100-300000000 {
-               opp-hz = /bits/ 64 <300000000>;
-               opp-microvolt = <1100000 1078000 1122000>;
-               opp-supported-hw = <0x06 0x0020>;
-               opp-suspend;
-       };
-
-       opp100-500000000 {
-               opp-hz = /bits/ 64 <500000000>;
-               opp-microvolt = <1100000 1078000 1122000>;
-               opp-supported-hw = <0x01 0xFFFF>;
-       };
-
-       opp100-600000000 {
-               opp-hz = /bits/ 64 <600000000>;
-               opp-microvolt = <1100000 1078000 1122000>;
-               opp-supported-hw = <0x06 0x0040>;
-       };
-
-       opp120-600000000 {
-               opp-hz = /bits/ 64 <600000000>;
-               opp-microvolt = <1200000 1176000 1224000>;
-               opp-supported-hw = <0x01 0xFFFF>;
-       };
-
-       opp120-720000000 {
-               opp-hz = /bits/ 64 <720000000>;
-               opp-microvolt = <1200000 1176000 1224000>;
-               opp-supported-hw = <0x06 0x0080>;
-       };
-
-       oppturbo-720000000 {
-               opp-hz = /bits/ 64 <720000000>;
-               opp-microvolt = <1260000 1234800 1285200>;
-               opp-supported-hw = <0x01 0xFFFF>;
-       };
-
-       oppturbo-800000000 {
-               opp-hz = /bits/ 64 <800000000>;
-               opp-microvolt = <1260000 1234800 1285200>;
-               opp-supported-hw = <0x06 0x0100>;
-       };
-
-       oppnitro-1000000000 {
-               opp-hz = /bits/ 64 <1000000000>;
-               opp-microvolt = <1325000 1298500 1351500>;
-               opp-supported-hw = <0x04 0x0200>;
-       };
-};
index ae4f68d..bd67cfe 100644 (file)
@@ -105,7 +105,7 @@ properties:
       G coefficient for temperature equation.
       Default for series 5 = 60000
       Default for series 6 = 57400
-    multipleOf: 1000
+    multipleOf: 100
     minimum: 1000
     $ref: /schemas/types.yaml#/definitions/uint32
 
@@ -114,7 +114,7 @@ properties:
       H coefficient for temperature equation.
       Default for series 5 = 200000
       Default for series 6 = 249400
-    multipleOf: 1000
+    multipleOf: 100
     minimum: 1000
     $ref: /schemas/types.yaml#/definitions/uint32
 
@@ -131,7 +131,7 @@ properties:
       J coefficient for temperature equation.
       Default for series 5 = -100
       Default for series 6 = 0
-    multipleOf: 1000
+    multipleOf: 100
     maximum: 0
     $ref: /schemas/types.yaml#/definitions/int32
 
index 72d2e91..2594fa1 100644 (file)
@@ -216,7 +216,6 @@ properties:
     description: Whether to enable burnout current for EXT1.
 
   adi,ext1-burnout-current-nanoamp:
-    $ref: /schemas/types.yaml#/definitions/uint32
     description:
       Burnout current in nanoamps to be applied to EXT1.
     enum: [0, 50, 500, 1000, 10000]
@@ -233,7 +232,6 @@ properties:
     description: Whether to enable burnout current for EXT2.
 
   adi,ext2-burnout-current-nanoamp:
-    $ref: /schemas/types.yaml#/definitions/uint32
     description: Burnout current in nanoamps to be applied to EXT2.
     enum: [0, 50, 500, 1000, 10000]
     default: 0
@@ -249,7 +247,6 @@ properties:
     description: Whether to enable burnout current for VIOUT.
 
   adi,viout-burnout-current-nanoamp:
-    $ref: /schemas/types.yaml#/definitions/uint32
     description: Burnout current in nanoamps to be applied to VIOUT.
     enum: [0, 1000, 10000]
     default: 0
index e84e4f3..3d06db9 100644 (file)
@@ -35,6 +35,7 @@ properties:
               - amlogic,meson-sm1-gpio-intc
               - amlogic,meson-a1-gpio-intc
               - amlogic,meson-s4-gpio-intc
+              - amlogic,c3-gpio-intc
           - const: amlogic,meson-gpio-intc
 
   reg:
index acb2b2a..31cc0c4 100644 (file)
@@ -293,7 +293,7 @@ allOf:
 patternProperties:
   "^mac@[0-1]$":
     type: object
-    additionalProperties: false
+    unevaluatedProperties: false
     allOf:
       - $ref: ethernet-controller.yaml#
     description:
@@ -305,14 +305,9 @@ patternProperties:
       reg:
         maxItems: 1
 
-      phy-handle: true
-
-      phy-mode: true
-
     required:
       - reg
       - compatible
-      - phy-handle
 
 required:
   - compatible
index 176ea5f..7f324c6 100644 (file)
@@ -91,12 +91,18 @@ properties:
     $ref: /schemas/types.yaml#/definitions/phandle
 
   tx_delay:
-    description: Delay value for TXD timing. Range value is 0~0x7F, 0x30 as default.
+    description: Delay value for TXD timing.
     $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 0x7F
+    default: 0x30
 
   rx_delay:
-    description: Delay value for RXD timing. Range value is 0~0x7F, 0x10 as default.
+    description: Delay value for RXD timing.
     $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 0x7F
+    default: 0x10
 
   phy-supply:
     description: PHY regulator
diff --git a/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml b/Documentation/devicetree/bindings/opp/operating-points-v2-ti-cpu.yaml
new file mode 100644 (file)
index 0000000..02d1d2c
--- /dev/null
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/opp/operating-points-v2-ti-cpu.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: TI CPU OPP (Operating Performance Points)
+
+description:
+  TI SoCs, like those in the AM335x, AM437x, AM57xx, AM62x, and DRA7xx
+  families, the CPU frequencies subset and the voltage value of each
+  OPP vary based on the silicon variant used. The data sheet sections
+  corresponding to "Operating Performance Points" describe the frequency
+  and voltage values based on device type and speed bin information
+  blown in corresponding eFuse bits as referred to by the Technical
+  Reference Manual.
+
+  This document extends the operating-points-v2 binding by providing
+  the hardware description for the scheme mentioned above.
+
+maintainers:
+  - Nishanth Menon <nm@ti.com>
+
+allOf:
+  - $ref: opp-v2-base.yaml#
+
+properties:
+  compatible:
+    const: operating-points-v2-ti-cpu
+
+  syscon:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description: |
+      points to syscon node representing the control module
+      register space of the SoC.
+
+  opp-shared: true
+
+patternProperties:
+  '^opp(-?[0-9]+)*$':
+    type: object
+    additionalProperties: false
+
+    properties:
+      clock-latency-ns: true
+      opp-hz: true
+      opp-microvolt: true
+      opp-supported-hw: true
+      opp-suspend: true
+      turbo-mode: true
+
+    required:
+      - opp-hz
+      - opp-supported-hw
+
+required:
+  - compatible
+  - syscon
+
+additionalProperties: false
+
+examples:
+  - |
+    opp-table {
+        compatible = "operating-points-v2-ti-cpu";
+        syscon = <&scm_conf>;
+
+        opp-300000000 {
+            opp-hz = /bits/ 64 <300000000>;
+            opp-microvolt = <1100000 1078000 1122000>;
+            opp-supported-hw = <0x06 0x0020>;
+            opp-suspend;
+        };
+
+        opp-500000000 {
+            opp-hz = /bits/ 64 <500000000>;
+            opp-microvolt = <1100000 1078000 1122000>;
+            opp-supported-hw = <0x01 0xFFFF>;
+        };
+
+        opp-600000000 {
+            opp-hz = /bits/ 64 <600000000>;
+            opp-microvolt = <1100000 1078000 1122000>;
+            opp-supported-hw = <0x06 0x0040>;
+        };
+
+        opp-1000000000 {
+            opp-hz = /bits/ 64 <1000000000>;
+            opp-microvolt = <1325000 1298500 1351500>;
+            opp-supported-hw = <0x04 0x0200>;
+        };
+    };
index 47e6f36..e2f8f7a 100644 (file)
@@ -56,7 +56,7 @@ patternProperties:
           need to be configured and that is left for the implementation
           specific binding.
         minItems: 1
-        maxItems: 16
+        maxItems: 32
         items:
           maxItems: 1
 
diff --git a/Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml b/Documentation/devicetree/bindings/opp/ti,omap-opp-supply.yaml
new file mode 100644 (file)
index 0000000..693f225
--- /dev/null
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/opp/ti,omap-opp-supply.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments OMAP compatible OPP supply
+
+description:
+  OMAP5, DRA7, and AM57 families of SoCs have Class 0 AVS eFuse
+  registers, which contain OPP-specific voltage information tailored
+  for the specific device. This binding provides the information
+  needed to describe such a hardware values and relate them to program
+  the primary regulator during an OPP transition.
+
+  Also, some supplies may have an associated vbb-supply, an Adaptive
+  Body Bias regulator, which must transition in a specific sequence
+  w.r.t the vdd-supply and clk when making an OPP transition. By
+  supplying two regulators to the device that will undergo OPP
+  transitions, we can use the multi-regulator support implemented by
+  the OPP core to describe both regulators the platform needs. The
+  OPP core binding Documentation/devicetree/bindings/opp/opp-v2.yaml
+  provides further information (refer to Example 4 Handling multiple
+  regulators).
+
+maintainers:
+  - Nishanth Menon <nm@ti.com>
+
+properties:
+  $nodename:
+    pattern: '^opp-supply(@[0-9a-f]+)?$'
+
+  compatible:
+    oneOf:
+      - description: Basic OPP supply controlling VDD and VBB
+        const: ti,omap-opp-supply
+      - description: OMAP5+ optimized voltages in efuse(Class 0) VDD along with
+          VBB.
+        const: ti,omap5-opp-supply
+      - description: OMAP5+ optimized voltages in efuse(class0) VDD but no VBB
+        const: ti,omap5-core-opp-supply
+
+  reg:
+    maxItems: 1
+
+  ti,absolute-max-voltage-uv:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Absolute maximum voltage for the OPP supply in micro-volts.
+    minimum: 750000
+    maximum: 1500000
+
+  ti,efuse-settings:
+    description: An array of u32 tuple items providing information about
+      optimized efuse configuration.
+    minItems: 1
+    $ref: /schemas/types.yaml#/definitions/uint32-matrix
+    items:
+      items:
+        - description: Reference voltage in micro-volts (OPP Voltage)
+          minimum: 750000
+          maximum: 1500000
+          multipleOf: 10000
+        - description: efuse offset where the optimized voltage is located
+          multipleOf: 4
+          maximum: 256
+
+required:
+  - compatible
+  - ti,absolute-max-voltage-uv
+
+allOf:
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              const: ti,omap-opp-supply
+    then:
+      required:
+        - reg
+        - ti,efuse-settings
+
+additionalProperties: false
+
+examples:
+  - |
+    opp-supply {
+        compatible = "ti,omap-opp-supply";
+        ti,absolute-max-voltage-uv = <1375000>;
+    };
+  - |
+    opp-supply@4a003b20 {
+        compatible = "ti,omap5-opp-supply";
+        reg = <0x4a003b20 0x8>;
+        ti,efuse-settings =
+            /* uV   offset */
+            <1060000 0x0>,
+            <1160000 0x4>,
+            <1210000 0x8>;
+        ti,absolute-max-voltage-uv = <1500000>;
+    };
diff --git a/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt b/Documentation/devicetree/bindings/opp/ti-omap5-opp-supply.txt
deleted file mode 100644 (file)
index b70d326..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-Texas Instruments OMAP compatible OPP supply description
-
-OMAP5, DRA7, and AM57 family of SoCs have Class0 AVS eFuse registers which
-contain data that can be used to adjust voltages programmed for some of their
-supplies for more efficient operation. This binding provides the information
-needed to read these values and use them to program the main regulator during
-an OPP transitions.
-
-Also, some supplies may have an associated vbb-supply which is an Adaptive Body
-Bias regulator which much be transitioned in a specific sequence with regards
-to the vdd-supply and clk when making an OPP transition. By supplying two
-regulators to the device that will undergo OPP transitions we can make use
-of the multi regulator binding that is part of the OPP core described here [1]
-to describe both regulators needed by the platform.
-
-[1] Documentation/devicetree/bindings/opp/opp-v2.yaml
-
-Required Properties for Device Node:
-- vdd-supply: phandle to regulator controlling VDD supply
-- vbb-supply: phandle to regulator controlling Body Bias supply
-             (Usually Adaptive Body Bias regulator)
-
-Required Properties for opp-supply node:
-- compatible: Should be one of:
-       "ti,omap-opp-supply" - basic OPP supply controlling VDD and VBB
-       "ti,omap5-opp-supply" - OMAP5+ optimized voltages in efuse(class0)VDD
-                           along with VBB
-       "ti,omap5-core-opp-supply" - OMAP5+ optimized voltages in efuse(class0) VDD
-                           but no VBB.
-- reg: Address and length of the efuse register set for the device (mandatory
-       only for "ti,omap5-opp-supply")
-- ti,efuse-settings: An array of u32 tuple items providing information about
-       optimized efuse configuration. Each item consists of the following:
-       volt: voltage in uV - reference voltage (OPP voltage)
-       efuse_offseet: efuse offset from reg where the optimized voltage is stored.
-- ti,absolute-max-voltage-uv: absolute maximum voltage for the OPP supply.
-
-Example:
-
-/* Device Node (CPU)  */
-cpus {
-       cpu0: cpu@0 {
-               device_type = "cpu";
-
-               ...
-
-               vdd-supply = <&vcc>;
-               vbb-supply = <&abb_mpu>;
-       };
-};
-
-/* OMAP OPP Supply with Class0 registers */
-opp_supply_mpu: opp_supply@4a003b20 {
-       compatible = "ti,omap5-opp-supply";
-       reg = <0x4a003b20 0x8>;
-       ti,efuse-settings = <
-       /* uV   offset */
-       1060000 0x0
-       1160000 0x4
-       1210000 0x8
-       >;
-       ti,absolute-max-voltage-uv = <1500000>;
-};
index e608a4f..e119a22 100644 (file)
@@ -87,7 +87,7 @@ $defs:
                 emac0_mdc, emac0_mdio, emac0_ptp_aux, emac0_ptp_pps, emac1_mcg0,
                 emac1_mcg1, emac1_mcg2, emac1_mcg3, emac1_mdc, emac1_mdio,
                 emac1_ptp_aux, emac1_ptp_pps, gcc_gp1, gcc_gp2, gcc_gp3,
-                gcc_gp4, gcc_gp5, hs0_mi2s, hs1_mi2s, hs2_mi2s, ibi_i3c,
+                gcc_gp4, gcc_gp5, gpio, hs0_mi2s, hs1_mi2s, hs2_mi2s, ibi_i3c,
                 jitter_bist, mdp0_vsync0, mdp0_vsync1, mdp0_vsync2, mdp0_vsync3,
                 mdp0_vsync4, mdp0_vsync5, mdp0_vsync6, mdp0_vsync7, mdp0_vsync8,
                 mdp1_vsync0, mdp1_vsync1, mdp1_vsync2, mdp1_vsync3, mdp1_vsync4,
index 30b2131..65cb2e5 100644 (file)
@@ -16,7 +16,6 @@ properties:
       - enum:
           - atmel,at91rm9200-usart
           - atmel,at91sam9260-usart
-          - microchip,sam9x60-usart
       - items:
           - const: atmel,at91rm9200-dbgu
           - const: atmel,at91rm9200-usart
@@ -24,6 +23,9 @@ properties:
           - const: atmel,at91sam9260-dbgu
           - const: atmel,at91sam9260-usart
       - items:
+          - const: microchip,sam9x60-usart
+          - const: atmel,at91sam9260-usart
+      - items:
           - const: microchip,sam9x60-dbgu
           - const: microchip,sam9x60-usart
           - const: atmel,at91sam9260-dbgu
diff --git a/Documentation/devicetree/bindings/serial/cavium-uart.txt b/Documentation/devicetree/bindings/serial/cavium-uart.txt
deleted file mode 100644 (file)
index 87a6c37..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-* Universal Asynchronous Receiver/Transmitter (UART)
-
-- compatible: "cavium,octeon-3860-uart"
-
-  Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
-
-- reg: The base address of the UART register bank.
-
-- interrupts: A single interrupt specifier.
-
-- current-speed: Optional, the current bit rate in bits per second.
-
-Example:
-       uart1: serial@1180000000c00 {
-               compatible = "cavium,octeon-3860-uart","ns16550";
-               reg = <0x11800 0x00000c00 0x0 0x400>;
-               current-speed = <115200>;
-               interrupts = <0 35>;
-       };
diff --git a/Documentation/devicetree/bindings/serial/nxp,lpc1850-uart.txt b/Documentation/devicetree/bindings/serial/nxp,lpc1850-uart.txt
deleted file mode 100644 (file)
index 04e23e6..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-* NXP LPC1850 UART
-
-Required properties:
-- compatible   : "nxp,lpc1850-uart", "ns16550a".
-- reg          : offset and length of the register set for the device.
-- interrupts   : should contain uart interrupt.
-- clocks       : phandle to the input clocks.
-- clock-names  : required elements: "uartclk", "reg".
-
-Optional properties:
-- dmas         : Two or more DMA channel specifiers following the
-                 convention outlined in bindings/dma/dma.txt
-- dma-names    : Names for the dma channels, if present. There must
-                 be at least one channel named "tx" for transmit
-                 and named "rx" for receive.
-
-Since it's also possible to also use the of_serial.c driver all
-parameters from 8250.txt also apply but are optional.
-
-Example:
-uart0: serial@40081000 {
-       compatible = "nxp,lpc1850-uart", "ns16550a";
-       reg = <0x40081000 0x1000>;
-       reg-shift = <2>;
-       interrupts = <24>;
-       clocks = <&ccu2 CLK_APB0_UART0>, <&ccu1 CLK_CPU_UART0>;
-       clock-names = "uartclk", "reg";
-};
index 3de7b36..d3ce4de 100644 (file)
@@ -39,22 +39,4 @@ required:
 
 additionalProperties: false
 
-examples:
-  - |
-    sound {
-        compatible = "audio-graph-card2";
-
-        links = <&cpu_port>;
-    };
-
-    cpu {
-        compatible = "cpu-driver";
-
-        cpu_port: port { cpu_ep: endpoint { remote-endpoint = <&codec_ep>; }; };
-    };
-
-    codec {
-        compatible = "codec-driver";
-
-        port { codec_ep: endpoint { remote-endpoint = <&cpu_ep>; }; };
-    };
+...
index 666a95a..ba5b772 100644 (file)
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Google SC7180-Trogdor ASoC sound card driver
 
 maintainers:
-  - Rohit kumar <rohitkr@codeaurora.org>
+  - Rohit kumar <quic_rohkumar@quicinc.com>
   - Cheng-Yi Chiang <cychiang@chromium.org>
 
 description:
index 6cc8f86..3a559bd 100644 (file)
@@ -8,7 +8,7 @@ title: Qualcomm Technologies Inc. LPASS CPU dai driver
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
-  - Rohit kumar <rohitkr@codeaurora.org>
+  - Rohit kumar <quic_rohkumar@quicinc.com>
 
 description: |
   Qualcomm Technologies Inc. SOC Low-Power Audio SubSystem (LPASS) that consist
diff --git a/Documentation/devicetree/bindings/watchdog/loongson,ls1x-wdt.yaml b/Documentation/devicetree/bindings/watchdog/loongson,ls1x-wdt.yaml
new file mode 100644 (file)
index 0000000..81690d4
--- /dev/null
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/loongson,ls1x-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Loongson-1 Watchdog Timer
+
+maintainers:
+  - Keguang Zhang <keguang.zhang@gmail.com>
+
+allOf:
+  - $ref: watchdog.yaml#
+
+properties:
+  compatible:
+    enum:
+      - loongson,ls1b-wdt
+      - loongson,ls1c-wdt
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/loongson,ls1x-clk.h>
+    watchdog: watchdog@1fe5c060 {
+        compatible = "loongson,ls1b-wdt";
+        reg = <0x1fe5c060 0xc>;
+
+        clocks = <&clkc LS1X_CLKID_APB>;
+    };
index 5158577..8c0845c 100644 (file)
@@ -27,7 +27,7 @@ not strictly considered I/O devices. They are considered here as well,
 although they are not the focus of this document.
 
 Some additional information can also be found in the kernel source under
-Documentation/s390/driver-model.rst.
+Documentation/arch/s390/driver-model.rst.
 
 The css bus
 ===========
@@ -38,7 +38,7 @@ into several categories:
 * Standard I/O subchannels, for use by the system. They have a child
   device on the ccw bus and are described below.
 * I/O subchannels bound to the vfio-ccw driver. See
-  Documentation/s390/vfio-ccw.rst.
+  Documentation/arch/s390/vfio-ccw.rst.
 * Message subchannels. No Linux driver currently exists.
 * CHSC subchannels (at most one). The chsc subchannel driver can be used
   to send asynchronous chsc commands.
index eccd327..a624e92 100644 (file)
@@ -332,54 +332,121 @@ Encryption modes and usage
 fscrypt allows one encryption mode to be specified for file contents
 and one encryption mode to be specified for filenames.  Different
 directory trees are permitted to use different encryption modes.
+
+Supported modes
+---------------
+
 Currently, the following pairs of encryption modes are supported:
 
 - AES-256-XTS for contents and AES-256-CTS-CBC for filenames
-- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
+- AES-256-XTS for contents and AES-256-HCTR2 for filenames
 - Adiantum for both contents and filenames
-- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
-- SM4-XTS for contents and SM4-CTS-CBC for filenames (v2 policies only)
-
-If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
-
-AES-128-CBC was added only for low-powered embedded devices with
-crypto accelerators such as CAAM or CESA that do not support XTS.  To
-use AES-128-CBC, CONFIG_CRYPTO_ESSIV and CONFIG_CRYPTO_SHA256 (or
-another SHA-256 implementation) must be enabled so that ESSIV can be
-used.
-
-Adiantum is a (primarily) stream cipher-based mode that is fast even
-on CPUs without dedicated crypto instructions.  It's also a true
-wide-block mode, unlike XTS.  It can also eliminate the need to derive
-per-file encryption keys.  However, it depends on the security of two
-primitives, XChaCha12 and AES-256, rather than just one.  See the
-paper "Adiantum: length-preserving encryption for entry-level
-processors" (https://eprint.iacr.org/2018/720.pdf) for more details.
-To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled.  Also, fast
-implementations of ChaCha and NHPoly1305 should be enabled, e.g.
-CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
-
-AES-256-HCTR2 is another true wide-block encryption mode that is intended for
-use on CPUs with dedicated crypto instructions.  AES-256-HCTR2 has the property
-that a bitflip in the plaintext changes the entire ciphertext.  This property
-makes it desirable for filename encryption since initialization vectors are
-reused within a directory.  For more details on AES-256-HCTR2, see the paper
-"Length-preserving encryption with HCTR2"
-(https://eprint.iacr.org/2021/1441.pdf).  To use AES-256-HCTR2,
-CONFIG_CRYPTO_HCTR2 must be enabled.  Also, fast implementations of XCTR and
-POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
-CRYPTO_AES_ARM64_CE_BLK for ARM64.
-
-SM4 is a Chinese block cipher that is an alternative to AES.  It has
-not seen as much security review as AES, and it only has a 128-bit key
-size.  It may be useful in cases where its use is mandated.
-Otherwise, it should not be used.  For SM4 support to be available, it
-also needs to be enabled in the kernel crypto API.
-
-New encryption modes can be added relatively easily, without changes
-to individual filesystems.  However, authenticated encryption (AE)
-modes are not currently supported because of the difficulty of dealing
-with ciphertext expansion.
+- AES-128-CBC-ESSIV for contents and AES-128-CTS-CBC for filenames
+- SM4-XTS for contents and SM4-CTS-CBC for filenames
+
+Authenticated encryption modes are not currently supported because of
+the difficulty of dealing with ciphertext expansion.  Therefore,
+contents encryption uses a block cipher in `XTS mode
+<https://en.wikipedia.org/wiki/Disk_encryption_theory#XTS>`_ or
+`CBC-ESSIV mode
+<https://en.wikipedia.org/wiki/Disk_encryption_theory#Encrypted_salt-sector_initialization_vector_(ESSIV)>`_,
+or a wide-block cipher.  Filenames encryption uses a
+block cipher in `CTS-CBC mode
+<https://en.wikipedia.org/wiki/Ciphertext_stealing>`_ or a wide-block
+cipher.
+
+The (AES-256-XTS, AES-256-CTS-CBC) pair is the recommended default.
+It is also the only option that is *guaranteed* to always be supported
+if the kernel supports fscrypt at all; see `Kernel config options`_.
+
+The (AES-256-XTS, AES-256-HCTR2) pair is also a good choice that
+upgrades the filenames encryption to use a wide-block cipher.  (A
+*wide-block cipher*, also called a tweakable super-pseudorandom
+permutation, has the property that changing one bit scrambles the
+entire result.)  As described in `Filenames encryption`_, a wide-block
+cipher is the ideal mode for the problem domain, though CTS-CBC is the
+"least bad" choice among the alternatives.  For more information about
+HCTR2, see `the HCTR2 paper <https://eprint.iacr.org/2021/1441.pdf>`_.
+
+Adiantum is recommended on systems where AES is too slow due to lack
+of hardware acceleration for AES.  Adiantum is a wide-block cipher
+that uses XChaCha12 and AES-256 as its underlying components.  Most of
+the work is done by XChaCha12, which is much faster than AES when AES
+acceleration is unavailable.  For more information about Adiantum, see
+`the Adiantum paper <https://eprint.iacr.org/2018/720.pdf>`_.
+
+The (AES-128-CBC-ESSIV, AES-128-CTS-CBC) pair exists only to support
+systems whose only form of AES acceleration is an off-CPU crypto
+accelerator such as CAAM or CESA that does not support XTS.
+
+The remaining mode pairs are the "national pride ciphers":
+
+- (SM4-XTS, SM4-CTS-CBC)
+
+Generally speaking, these ciphers aren't "bad" per se, but they
+receive limited security review compared to the usual choices such as
+AES and ChaCha.  They also don't bring much new to the table.  It is
+suggested to only use these ciphers where their use is mandated.
+
+Kernel config options
+---------------------
+
+Enabling fscrypt support (CONFIG_FS_ENCRYPTION) automatically pulls in
+only the basic support from the crypto API needed to use AES-256-XTS
+and AES-256-CTS-CBC encryption.  For optimal performance, it is
+strongly recommended to also enable any available platform-specific
+kconfig options that provide acceleration for the algorithm(s) you
+wish to use.  Support for any "non-default" encryption modes typically
+requires extra kconfig options as well.
+
+Below, some relevant options are listed by encryption mode.  Note,
+acceleration options not listed below may be available for your
+platform; refer to the kconfig menus.  File contents encryption can
+also be configured to use inline encryption hardware instead of the
+kernel crypto API (see `Inline encryption support`_); in that case,
+the file contents mode doesn't need to supported in the kernel crypto
+API, but the filenames mode still does.
+
+- AES-256-XTS and AES-256-CTS-CBC
+    - Recommended:
+        - arm64: CONFIG_CRYPTO_AES_ARM64_CE_BLK
+        - x86: CONFIG_CRYPTO_AES_NI_INTEL
+
+- AES-256-HCTR2
+    - Mandatory:
+        - CONFIG_CRYPTO_HCTR2
+    - Recommended:
+        - arm64: CONFIG_CRYPTO_AES_ARM64_CE_BLK
+        - arm64: CONFIG_CRYPTO_POLYVAL_ARM64_CE
+        - x86: CONFIG_CRYPTO_AES_NI_INTEL
+        - x86: CONFIG_CRYPTO_POLYVAL_CLMUL_NI
+
+- Adiantum
+    - Mandatory:
+        - CONFIG_CRYPTO_ADIANTUM
+    - Recommended:
+        - arm32: CONFIG_CRYPTO_CHACHA20_NEON
+        - arm32: CONFIG_CRYPTO_NHPOLY1305_NEON
+        - arm64: CONFIG_CRYPTO_CHACHA20_NEON
+        - arm64: CONFIG_CRYPTO_NHPOLY1305_NEON
+        - x86: CONFIG_CRYPTO_CHACHA20_X86_64
+        - x86: CONFIG_CRYPTO_NHPOLY1305_SSE2
+        - x86: CONFIG_CRYPTO_NHPOLY1305_AVX2
+
+- AES-128-CBC-ESSIV and AES-128-CTS-CBC:
+    - Mandatory:
+        - CONFIG_CRYPTO_ESSIV
+        - CONFIG_CRYPTO_SHA256 or another SHA-256 implementation
+    - Recommended:
+        - AES-CBC acceleration
+
+fscrypt also uses HMAC-SHA512 for key derivation, so enabling SHA-512
+acceleration is recommended:
+
+- SHA-512
+    - Recommended:
+        - arm64: CONFIG_CRYPTO_SHA512_ARM64_CE
+        - x86: CONFIG_CRYPTO_SHA512_SSSE3
 
 Contents encryption
 -------------------
@@ -493,7 +560,14 @@ This structure must be initialized as follows:
   be set to constants from ``<linux/fscrypt.h>`` which identify the
   encryption modes to use.  If unsure, use FSCRYPT_MODE_AES_256_XTS
   (1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
-  (4) for ``filenames_encryption_mode``.
+  (4) for ``filenames_encryption_mode``.  For details, see `Encryption
+  modes and usage`_.
+
+  v1 encryption policies only support three combinations of modes:
+  (FSCRYPT_MODE_AES_256_XTS, FSCRYPT_MODE_AES_256_CTS),
+  (FSCRYPT_MODE_AES_128_CBC, FSCRYPT_MODE_AES_128_CTS), and
+  (FSCRYPT_MODE_ADIANTUM, FSCRYPT_MODE_ADIANTUM).  v2 policies support
+  all combinations documented in `Supported modes`_.
 
 - ``flags`` contains optional flags from ``<linux/fscrypt.h>``:
 
index ad6d216..d095c58 100644 (file)
@@ -146,9 +146,10 @@ For the rest of this document we will prefix all userspace ids with ``u`` and
 all kernel ids with ``k``. Ranges of idmappings will be prefixed with ``r``. So
 an idmapping will be written as ``u0:k10000:r10000``.
 
-For example, the id ``u1000`` is an id in the upper idmapset or "userspace
-idmapset" starting with ``u1000``. And it is mapped to ``k11000`` which is a
-kernel id in the lower idmapset or "kernel idmapset" starting with ``k10000``.
+For example, within this idmapping, the id ``u1000`` is an id in the upper
+idmapset or "userspace idmapset" starting with ``u0``. And it is mapped to
+``k11000`` which is a kernel id in the lower idmapset or "kernel idmapset"
+starting with ``k10000``.
 
 A kernel id is always created by an idmapping. Such idmappings are associated
 with user namespaces. Since we mainly care about how idmappings work we're not
@@ -373,6 +374,13 @@ kernel maps the caller's userspace id down into a kernel id according to the
 caller's idmapping and then maps that kernel id up according to the
 filesystem's idmapping.
 
+From the implementation point it's worth mentioning how idmappings are represented.
+All idmappings are taken from the corresponding user namespace.
+
+    - caller's idmapping (usually taken from ``current_user_ns()``)
+    - filesystem's idmapping (``sb->s_user_ns``)
+    - mount's idmapping (``mnt_idmap(vfsmnt)``)
+
 Let's see some examples with caller/filesystem idmapping but without mount
 idmappings. This will exhibit some problems we can hit. After that we will
 revisit/reconsider these examples, this time using mount idmappings, to see how
index ed14891..b7e5a38 100644 (file)
@@ -85,13 +85,14 @@ prototypes::
                            struct dentry *dentry, struct fileattr *fa);
        int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
        struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
+       struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
 
 locking rules:
        all may block
 
-============== =============================================
+============== ==================================================
 ops            i_rwsem(inode)
-============== =============================================
+============== ==================================================
 lookup:                shared
 create:                exclusive
 link:          exclusive (both)
@@ -115,7 +116,8 @@ atomic_open:        shared (exclusive if O_CREAT is set in open flags)
 tmpfile:       no
 fileattr_get:  no or exclusive
 fileattr_set:  exclusive
-============== =============================================
+get_offset_ctx  no
+============== ==================================================
 
 
        Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
@@ -374,10 +376,17 @@ invalidate_lock before invalidating page cache in truncate / hole punch
 path (and thus calling into ->invalidate_folio) to block races between page
 cache invalidation and page cache filling functions (fault, read, ...).
 
-->release_folio() is called when the kernel is about to try to drop the
-buffers from the folio in preparation for freeing it.  It returns false to
-indicate that the buffers are (or may be) freeable.  If ->release_folio is
-NULL, the kernel assumes that the fs has no private interest in the buffers.
+->release_folio() is called when the MM wants to make a change to the
+folio that would invalidate the filesystem's private data.  For example,
+it may be about to be removed from the address_space or split.  The folio
+is locked and not under writeback.  It may be dirty.  The gfp parameter
+is not usually used for allocation, but rather to indicate what the
+filesystem may do to attempt to free the private data.  The filesystem may
+return false to indicate that the folio's private data cannot be freed.
+If it returns true, it should have already removed the private data from
+the folio.  If a filesystem does not provide a ->release_folio method,
+the pagecache will assume that private data is buffer_heads and call
+try_to_free_buffers().
 
 ->free_folio() is called when the kernel has dropped the folio
 from the page cache.
@@ -551,9 +560,8 @@ mutex or just to use i_size_read() instead.
 Note: this does not protect the file->f_pos against concurrent modifications
 since this is something the userspace has to take care about.
 
-->iterate() is called with i_rwsem exclusive.
-
-->iterate_shared() is called with i_rwsem at least shared.
+->iterate_shared() is called with i_rwsem held for reading, and with the
+file f_pos_lock held exclusively
 
 ->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
 Most instances call fasync_helper(), which does that maintenance, so it's
index d2d684a..0f5da78 100644 (file)
@@ -537,7 +537,7 @@ vfs_readdir() is gone; switch to iterate_dir() instead
 
 **mandatory**
 
-->readdir() is gone now; switch to ->iterate()
+->readdir() is gone now; switch to ->iterate_shared()
 
 **mandatory**
 
@@ -693,24 +693,19 @@ parallel now.
 
 ---
 
-**recommended**
+**mandatory**
 
-->iterate_shared() is added; it's a parallel variant of ->iterate().
+->iterate_shared() is added.
 Exclusion on struct file level is still provided (as well as that
 between it and lseek on the same struct file), but if your directory
 has been opened several times, you can get these called in parallel.
 Exclusion between that method and all directory-modifying ones is
 still provided, of course.
 
-Often enough ->iterate() can serve as ->iterate_shared() without any
-changes - it is a read-only operation, after all.  If you have any
-per-inode or per-dentry in-core data structures modified by ->iterate(),
-you might need something to serialize the access to them.  If you
-do dcache pre-seeding, you'll need to switch to d_alloc_parallel() for
-that; look for in-tree examples.
-
-Old method is only used if the new one is absent; eventually it will
-be removed.  Switch while you still can; the old one won't stay.
+If you have any per-inode or per-dentry in-core data structures modified
+by ->iterate_shared(), you might need something to serialize the access
+to them.  If you do dcache pre-seeding, you'll need to switch to
+d_alloc_parallel() for that; look for in-tree examples.
 
 ---
 
@@ -930,9 +925,9 @@ should be done by looking at FMODE_LSEEK in file->f_mode.
 filldir_t (readdir callbacks) calling conventions have changed.  Instead of
 returning 0 or -E... it returns bool now.  false means "no more" (as -E... used
 to) and true - "keep going" (as 0 in old calling conventions).  Rationale:
-callers never looked at specific -E... values anyway.  ->iterate() and
-->iterate_shared() instance require no changes at all, all filldir_t ones in
-the tree converted.
+callers never looked at specific -E... values anyway. -> iterate_shared()
+instances require no changes at all, all filldir_t ones in the tree
+converted.
 
 ---
 
index f18f46b..56a26c8 100644 (file)
@@ -21,8 +21,8 @@ explained further below, some of which can be reconfigured dynamically on the
 fly using a remount ('mount -o remount ...') of the filesystem. A tmpfs
 filesystem can be resized but it cannot be resized to a size below its current
 usage. tmpfs also supports POSIX ACLs, and extended attributes for the
-trusted.* and security.* namespaces. ramfs does not use swap and you cannot
-modify any parameter for a ramfs filesystem. The size limit of a ramfs
+trusted.*, security.* and user.* namespaces. ramfs does not use swap and you
+cannot modify any parameter for a ramfs filesystem. The size limit of a ramfs
 filesystem is how much memory you have available, and so care must be taken if
 used so to not run out of memory.
 
@@ -84,8 +84,6 @@ nr_inodes  The maximum number of inodes for this instance. The default
            is half of the number of your physical RAM pages, or (on a
            machine with highmem) the number of lowmem RAM pages,
            whichever is the lower.
-noswap     Disables swap. Remounts must respect the original settings.
-           By default swap is enabled.
 =========  ============================================================
 
 These parameters accept a suffix k, m or g for kilo, mega and giga and
@@ -99,36 +97,65 @@ mount with such options, since it allows any user with write access to
 use up all the memory on the machine; but enhances the scalability of
 that instance in a system with many CPUs making intensive use of it.
 
+If nr_inodes is not 0, that limited space for inodes is also used up by
+extended attributes: "df -i"'s IUsed and IUse% increase, IFree decreases.
+
+tmpfs blocks may be swapped out, when there is a shortage of memory.
+tmpfs has a mount option to disable its use of swap:
+
+======  ===========================================================
+noswap  Disables swap. Remounts must respect the original settings.
+        By default swap is enabled.
+======  ===========================================================
+
 tmpfs also supports Transparent Huge Pages which requires a kernel
 configured with CONFIG_TRANSPARENT_HUGEPAGE and with huge supported for
 your system (has_transparent_hugepage(), which is architecture specific).
 The mount options for this are:
 
-======  ============================================================
-huge=0  never: disables huge pages for the mount
-huge=1  always: enables huge pages for the mount
-huge=2  within_size: only allocate huge pages if the page will be
-        fully within i_size, also respect fadvise()/madvise() hints.
-huge=3  advise: only allocate huge pages if requested with
-        fadvise()/madvise()
-======  ============================================================
-
-There is a sysfs file which you can also use to control system wide THP
-configuration for all tmpfs mounts, the file is:
-
-/sys/kernel/mm/transparent_hugepage/shmem_enabled
-
-This sysfs file is placed on top of THP sysfs directory and so is registered
-by THP code. It is however only used to control all tmpfs mounts with one
-single knob. Since it controls all tmpfs mounts it should only be used either
-for emergency or testing purposes. The values you can set for shmem_enabled are:
-
-==  ============================================================
--1  deny: disables huge on shm_mnt and all mounts, for
-    emergency use
--2  force: enables huge on shm_mnt and all mounts, w/o needing
-    option, for testing
-==  ============================================================
+================ ==============================================================
+huge=never       Do not allocate huge pages.  This is the default.
+huge=always      Attempt to allocate huge page every time a new page is needed.
+huge=within_size Only allocate huge page if it will be fully within i_size.
+                 Also respect madvise(2) hints.
+huge=advise      Only allocate huge page if requested with madvise(2).
+================ ==============================================================
+
+See also Documentation/admin-guide/mm/transhuge.rst, which describes the
+sysfs file /sys/kernel/mm/transparent_hugepage/shmem_enabled: which can
+be used to deny huge pages on all tmpfs mounts in an emergency, or to
+force huge pages on all tmpfs mounts for testing.
+
+tmpfs also supports quota with the following mount options
+
+======================== =================================================
+quota                    User and group quota accounting and enforcement
+                         is enabled on the mount. Tmpfs is using hidden
+                         system quota files that are initialized on mount.
+usrquota                 User quota accounting and enforcement is enabled
+                         on the mount.
+grpquota                 Group quota accounting and enforcement is enabled
+                         on the mount.
+usrquota_block_hardlimit Set global user quota block hard limit.
+usrquota_inode_hardlimit Set global user quota inode hard limit.
+grpquota_block_hardlimit Set global group quota block hard limit.
+grpquota_inode_hardlimit Set global group quota inode hard limit.
+======================== =================================================
+
+None of the quota related mount options can be set or changed on remount.
+
+Quota limit parameters accept a suffix k, m or g for kilo, mega and giga
+and can't be changed on remount. Default global quota limits are taking
+effect for any and all user/group/project except root the first time the
+quota entry for user/group/project id is being accessed - typically the
+first time an inode with a particular id ownership is being created after
+the mount. In other words, instead of the limits being initialized to zero,
+they are initialized with the particular value provided with these mount
+options. The limits can be changed for any user/group id at any time as they
+normally can be.
+
+Note that tmpfs quotas do not support user namespaces so no uid/gid
+translation is done if quotas are enabled inside user namespaces.
 
 tmpfs has a mount option to set the NUMA memory allocation policy for
 all files in that instance (if CONFIG_NUMA is enabled) - which can be
index cb2a97e..f8fe815 100644 (file)
@@ -260,9 +260,11 @@ filesystem.  The following members are defined:
                void (*evict_inode) (struct inode *);
                void (*put_super) (struct super_block *);
                int (*sync_fs)(struct super_block *sb, int wait);
-               int (*freeze_super) (struct super_block *);
+               int (*freeze_super) (struct super_block *sb,
+                                       enum freeze_holder who);
                int (*freeze_fs) (struct super_block *);
-               int (*thaw_super) (struct super_block *);
+               int (*thaw_super) (struct super_block *sb,
+                                       enum freeze_wholder who);
                int (*unfreeze_fs) (struct super_block *);
                int (*statfs) (struct dentry *, struct kstatfs *);
                int (*remount_fs) (struct super_block *, int *, char *);
@@ -515,6 +517,7 @@ As of kernel 2.6.22, the following members are defined:
                int (*fileattr_set)(struct mnt_idmap *idmap,
                                    struct dentry *dentry, struct fileattr *fa);
                int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+               struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
        };
 
 Again, all methods are called without any locks being held, unless
@@ -675,7 +678,10 @@ otherwise noted.
        called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
        change miscellaneous file flags and attributes.  Callers hold
        i_rwsem exclusive.  If unset, then fall back to f_op->ioctl().
-
+``get_offset_ctx``
+       called to get the offset context for a directory inode. A
+        filesystem must define this operation to use
+        simple_offset_dir_operations.
 
 The Address Space Object
 ========================
index f37fc90..89419e1 100644 (file)
@@ -5,9 +5,8 @@ Chrome OS ACPI Device
 =====================
 
 Hardware functionality specific to Chrome OS is exposed through a Chrome OS ACPI device.
-The plug and play ID of a Chrome OS ACPI device is GGL0001. GGL is a valid PNP ID of Google.
-PNP ID can be used with the ACPI devices according to the guidelines. The following ACPI
-objects are supported:
+The plug and play ID of a Chrome OS ACPI device is GGL0001 and the hardware ID is
+GOOG0016.  The following ACPI objects are supported:
 
 .. flat-table:: Supported ACPI Objects
    :widths: 1 2
index b7d3ae7..41ddc10 100644 (file)
@@ -46,7 +46,7 @@ driver model device node, and its I2C address.
        },
 
        .id_table       = foo_idtable,
-       .probe_new      = foo_probe,
+       .probe          = foo_probe,
        .remove         = foo_remove,
        /* if device autodetection is needed: */
        .class          = I2C_CLASS_SOMETHING,
index a7a0477..7bf7b95 100644 (file)
@@ -65,15 +65,16 @@ argument - drivers can process completions for any number of Tx
 packets but should only process up to ``budget`` number of
 Rx packets. Rx processing is usually much more expensive.
 
-In other words, it is recommended to ignore the budget argument when
-performing TX buffer reclamation to ensure that the reclamation is not
-arbitrarily bounded; however, it is required to honor the budget argument
-for RX processing.
+In other words for Rx processing the ``budget`` argument limits how many
+packets driver can process in a single poll. Rx specific APIs like page
+pool or XDP cannot be used at all when ``budget`` is 0.
+skb Tx processing should happen regardless of the ``budget``, but if
+the argument is 0 driver cannot call any XDP (or page pool) APIs.
 
 .. warning::
 
-   The ``budget`` argument may be 0 if core tries to only process Tx completions
-   and no Rx packets.
+   The ``budget`` argument may be 0 if core tries to only process
+   skb Tx completions and no Rx or XDP packets.
 
 The poll method returns the amount of work done. If the driver still
 has outstanding work to do (e.g. ``budget`` was exhausted)
index 8b1045c..c383a39 100644 (file)
@@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds)
        Default is set to (hb_interval * path_max_retrans + rto_max)
 
 nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
-       default 0.3
+       default 3
 
 nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
-       default 0.3
+       default 3
 
 nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
        default 3
index df97812..cb68623 100644 (file)
@@ -254,7 +254,6 @@ an involved disclosed party. The current ambassadors list:
   Samsung      Javier González <javier.gonz@samsung.com>
 
   Microsoft    James Morris <jamorris@linux.microsoft.com>
-  VMware
   Xen          Andrew Cooper <andrew.cooper3@citrix.com>
 
   Canonical    John Johansen <john.johansen@canonical.com>
@@ -263,10 +262,8 @@ an involved disclosed party. The current ambassadors list:
   Red Hat      Josh Poimboeuf <jpoimboe@redhat.com>
   SUSE         Jiri Kosina <jkosina@suse.cz>
 
-  Amazon
   Google       Kees Cook <keescook@chromium.org>
 
-  GCC
   LLVM         Nick Desaulniers <ndesaulniers@google.com>
   ============= ========================================================
 
index 2397b31..2ab843c 100644 (file)
@@ -98,7 +98,7 @@ If you aren't subscribed to netdev and/or are simply unsure if
 repository link above for any new networking-related commits.  You may
 also check the following website for the current status:
 
-  http://vger.kernel.org/~davem/net-next.html
+  https://patchwork.hopto.org/net-next.html
 
 The ``net`` tree continues to collect fixes for the vX.Y content, and is
 fed back to Linus at regular (~weekly) intervals.  Meaning that the
index 82e2983..5a69937 100644 (file)
@@ -63,31 +63,28 @@ information submitted to the security list and any followup discussions
 of the report are treated confidentially even after the embargo has been
 lifted, in perpetuity.
 
-Coordination
-------------
-
-Fixes for sensitive bugs, such as those that might lead to privilege
-escalations, may need to be coordinated with the private
-<linux-distros@vs.openwall.org> mailing list so that distribution vendors
-are well prepared to issue a fixed kernel upon public disclosure of the
-upstream fix. Distros will need some time to test the proposed patch and
-will generally request at least a few days of embargo, and vendor update
-publication prefers to happen Tuesday through Thursday. When appropriate,
-the security team can assist with this coordination, or the reporter can
-include linux-distros from the start. In this case, remember to prefix
-the email Subject line with "[vs]" as described in the linux-distros wiki:
-<http://oss-security.openwall.org/wiki/mailing-lists/distros#how-to-use-the-lists>
+Coordination with other groups
+------------------------------
+
+The kernel security team strongly recommends that reporters of potential
+security issues NEVER contact the "linux-distros" mailing list until
+AFTER discussing it with the kernel security team.  Do not Cc: both
+lists at once.  You may contact the linux-distros mailing list after a
+fix has been agreed on and you fully understand the requirements that
+doing so will impose on you and the kernel community.
+
+The different lists have different goals and the linux-distros rules do
+not contribute to actually fixing any potential security problems.
 
 CVE assignment
 --------------
 
-The security team does not normally assign CVEs, nor do we require them
-for reports or fixes, as this can needlessly complicate the process and
-may delay the bug handling. If a reporter wishes to have a CVE identifier
-assigned ahead of public disclosure, they will need to contact the private
-linux-distros list, described above. When such a CVE identifier is known
-before a patch is provided, it is desirable to mention it in the commit
-message if the reporter agrees.
+The security team does not assign CVEs, nor do we require them for
+reports or fixes, as this can needlessly complicate the process and may
+delay the bug handling.  If a reporter wishes to have a CVE identifier
+assigned, they should find one by themselves, for example by contacting
+MITRE directly.  However under no circumstances will a patch inclusion
+be delayed to wait for a CVE identifier to arrive.
 
 Non-disclosure agreements
 -------------------------
index 19165eb..933c715 100644 (file)
@@ -49,7 +49,7 @@ The following keys are defined:
     privileged ISA, with the following known exceptions (more exceptions may be
     added, but only if it can be demonstrated that the user ABI is not broken):
 
-    * The :fence.i: instruction cannot be directly executed by userspace
+    * The ``fence.i`` instruction cannot be directly executed by userspace
       programs (it may still be executed in userspace via a
       kernel-controlled mechanism such as the vDSO).
 
index 03db555..f689198 100644 (file)
@@ -94,7 +94,7 @@ other HZ detail.  Thus the CFS scheduler has no notion of "timeslices" in the
 way the previous scheduler had, and has no heuristics whatsoever.  There is
 only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
 
-   /sys/kernel/debug/sched/min_granularity_ns
+   /sys/kernel/debug/sched/base_slice_ns
 
 which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
 "server" (i.e., good batching) workloads.  It defaults to a setting suitable
index d8aa64e..bf963d9 100644 (file)
@@ -187,7 +187,8 @@ WMI method BatteryeRawAnalytics()
 
 Returns a buffer usually containg 12 blocks of analytics data.
 Those blocks contain:
-- block number starting with 0 (u8)
+
+- a block number starting with 0 (u8)
 - 31 bytes of unknown data
 
 .. note::
index 3be1bdf..07e40f0 100644 (file)
@@ -1865,9 +1865,11 @@ M:       Martin PoviÅ¡er <povik+lin@cutebit.org>
 L:     asahi@lists.linux.dev
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Maintained
+F:     Documentation/devicetree/bindings/sound/adi,ssm3515.yaml
 F:     Documentation/devicetree/bindings/sound/apple,*
 F:     sound/soc/apple/*
 F:     sound/soc/codecs/cs42l83-i2c.c
+F:     sound/soc/codecs/ssm3515.c
 
 ARM/APPLE MACHINE SUPPORT
 M:     Hector Martin <marcan@marcan.st>
@@ -2337,7 +2339,7 @@ F:        drivers/phy/mediatek/
 ARM/MICROCHIP (ARM64) SoC support
 M:     Conor Dooley <conor@kernel.org>
 M:     Nicolas Ferre <nicolas.ferre@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 T:     git https://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git
@@ -2346,7 +2348,7 @@ F:        arch/arm64/boot/dts/microchip/
 ARM/Microchip (AT91) SoC support
 M:     Nicolas Ferre <nicolas.ferre@microchip.com>
 M:     Alexandre Belloni <alexandre.belloni@bootlin.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 W:     http://www.linux4sam.org
@@ -3248,7 +3250,7 @@ F:        include/uapi/linux/atm*
 
 ATMEL MACB ETHERNET DRIVER
 M:     Nicolas Ferre <nicolas.ferre@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 S:     Supported
 F:     drivers/net/ethernet/cadence/
 
@@ -3260,9 +3262,8 @@ F:        Documentation/devicetree/bindings/input/atmel,maxtouch.yaml
 F:     drivers/input/touchscreen/atmel_mxt_ts.c
 
 ATMEL WIRELESS DRIVER
-M:     Simon Kelley <simon@thekelleys.org.uk>
 L:     linux-wireless@vger.kernel.org
-S:     Maintained
+S:     Orphan
 W:     http://www.thekelleys.org.uk/atmel
 W:     http://atmelwlandriver.sourceforge.net/
 F:     drivers/net/wireless/atmel/atmel*
@@ -3392,7 +3393,7 @@ F:        drivers/media/radio/radio-aztech*
 B43 WIRELESS DRIVER
 L:     linux-wireless@vger.kernel.org
 L:     b43-dev@lists.infradead.org
-S:     Odd Fixes
+S:     Orphan
 W:     https://wireless.wiki.kernel.org/en/users/Drivers/b43
 F:     drivers/net/wireless/broadcom/b43/
 
@@ -4121,6 +4122,13 @@ F:       Documentation/devicetree/bindings/spi/brcm,bcm63xx-hsspi.yaml
 F:     drivers/spi/spi-bcm63xx-hsspi.c
 F:     drivers/spi/spi-bcmbca-hsspi.c
 
+BROADCOM BCM6348/BCM6358 SPI controller DRIVER
+M:     Jonas Gorski <jonas.gorski@gmail.com>
+L:     linux-spi@vger.kernel.org
+S:     Odd Fixes
+F:     Documentation/devicetree/bindings/spi/spi-bcm63xx.txt
+F:     drivers/spi/spi-bcm63xx.c
+
 BROADCOM ETHERNET PHY DRIVERS
 M:     Florian Fainelli <florian.fainelli@broadcom.com>
 R:     Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
@@ -4454,7 +4462,6 @@ CADENCE USB3 DRD IP DRIVER
 M:     Peter Chen <peter.chen@kernel.org>
 M:     Pawel Laszczak <pawell@cadence.com>
 R:     Roger Quadros <rogerq@kernel.org>
-R:     Aswath Govindraju <a-govindraju@ti.com>
 L:     linux-usb@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git
@@ -4812,6 +4819,7 @@ F:        drivers/input/touchscreen/chipone_icn8505.c
 
 CHROME HARDWARE PLATFORM SUPPORT
 M:     Benson Leung <bleung@chromium.org>
+M:     Tzung-Bi Shih <tzungbi@kernel.org>
 L:     chrome-platform@lists.linux.dev
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git
@@ -5140,10 +5148,12 @@ S:      Maintained
 F:     include/linux/compiler_attributes.h
 
 COMPUTE EXPRESS LINK (CXL)
+M:     Davidlohr Bueso <dave@stgolabs.net>
+M:     Jonathan Cameron <jonathan.cameron@huawei.com>
+M:     Dave Jiang <dave.jiang@intel.com>
 M:     Alison Schofield <alison.schofield@intel.com>
 M:     Vishal Verma <vishal.l.verma@intel.com>
 M:     Ira Weiny <ira.weiny@intel.com>
-M:     Ben Widawsky <bwidawsk@kernel.org>
 M:     Dan Williams <dan.j.williams@intel.com>
 L:     linux-cxl@vger.kernel.org
 S:     Maintained
@@ -5452,8 +5462,7 @@ F:        Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml
 F:     drivers/net/can/ctucanfd/
 
 CW1200 WLAN driver
-M:     Solomon Peachy <pizza@shaftnet.org>
-S:     Maintained
+S:     Orphan
 F:     drivers/net/wireless/st/cw1200/
 
 CX18 VIDEO4LINUX DRIVER
@@ -8672,8 +8681,11 @@ S:       Maintained
 F:     drivers/input/touchscreen/resistive-adc-touch.c
 
 GENERIC STRING LIBRARY
+M:     Kees Cook <keescook@chromium.org>
 R:     Andy Shevchenko <andy@kernel.org>
-S:     Maintained
+L:     linux-hardening@vger.kernel.org
+S:     Supported
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
 F:     include/linux/string.h
 F:     include/linux/string_choices.h
 F:     include/linux/string_helpers.h
@@ -8758,6 +8770,15 @@ S:       Supported
 F:     Documentation/networking/device_drivers/ethernet/google/gve.rst
 F:     drivers/net/ethernet/google
 
+GOOGLE FIRMWARE DRIVERS
+M:     Tzung-Bi Shih <tzungbi@kernel.org>
+R:     Brian Norris <briannorris@chromium.org>
+R:     Julius Werner <jwerner@chromium.org>
+L:     chrome-platform@lists.linux.dev
+S:     Maintained
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/chrome-platform/linux.git
+F:     drivers/firmware/google/
+
 GPD POCKET FAN DRIVER
 M:     Hans de Goede <hdegoede@redhat.com>
 L:     platform-driver-x86@vger.kernel.org
@@ -8801,6 +8822,7 @@ R:        Michael Walle <michael@walle.cc>
 S:     Maintained
 F:     drivers/gpio/gpio-regmap.c
 F:     include/linux/gpio/regmap.h
+K:     (devm_)?gpio_regmap_(un)?register
 
 GPIO SUBSYSTEM
 M:     Linus Walleij <linus.walleij@linaro.org>
@@ -9294,7 +9316,7 @@ F:        drivers/crypto/hisilicon/hpre/hpre_crypto.c
 F:     drivers/crypto/hisilicon/hpre/hpre_main.c
 
 HISILICON HNS3 PMU DRIVER
-M:     Guangbin Huang <huangguangbin2@huawei.com>
+M:     Jijie Shao <shaojijie@huawei.com>
 S:     Supported
 F:     Documentation/admin-guide/perf/hns3-pmu.rst
 F:     drivers/perf/hisilicon/hns3_pmu.c
@@ -9332,7 +9354,7 @@ F:        Documentation/devicetree/bindings/net/hisilicon*.txt
 F:     drivers/net/ethernet/hisilicon/
 
 HISILICON PMU DRIVER
-M:     Shaokun Zhang <zhangshaokun@hisilicon.com>
+M:     Yicong Yang <yangyicong@hisilicon.com>
 M:     Jonathan Cameron <jonathan.cameron@huawei.com>
 S:     Supported
 W:     http://www.hisilicon.com
@@ -9364,7 +9386,6 @@ F:        drivers/crypto/hisilicon/sgl.c
 F:     include/linux/hisi_acc_qm.h
 
 HISILICON ROCE DRIVER
-M:     Haoyue Xu <xuhaoyue1@hisilicon.com>
 M:     Junxian Huang <huangjunxian6@hisilicon.com>
 L:     linux-rdma@vger.kernel.org
 S:     Maintained
@@ -9649,6 +9670,7 @@ F:        tools/hv/
 
 HYPERBUS SUPPORT
 M:     Vignesh Raghavendra <vigneshr@ti.com>
+R:     Tudor Ambarus <tudor.ambarus@linaro.org>
 L:     linux-mtd@lists.infradead.org
 S:     Supported
 Q:     http://patchwork.ozlabs.org/project/linux-mtd/list/
@@ -12468,6 +12490,7 @@ F:      net/mctp/
 
 MAPLE TREE
 M:     Liam R. Howlett <Liam.Howlett@oracle.com>
+L:     maple-tree@lists.infradead.org
 L:     linux-mm@kvack.org
 S:     Supported
 F:     Documentation/core-api/maple_tree.rst
@@ -12579,18 +12602,14 @@ F:    Documentation/devicetree/bindings/net/marvell,pp2.yaml
 F:     drivers/net/ethernet/marvell/mvpp2/
 
 MARVELL MWIFIEX WIRELESS DRIVER
-M:     Amitkumar Karwar <amitkarwar@gmail.com>
-M:     Ganapathi Bhat <ganapathi017@gmail.com>
-M:     Sharvari Harisangam <sharvari.harisangam@nxp.com>
-M:     Xinming Hu <huxinming820@gmail.com>
+M:     Brian Norris <briannorris@chromium.org>
 L:     linux-wireless@vger.kernel.org
-S:     Maintained
+S:     Odd Fixes
 F:     drivers/net/wireless/marvell/mwifiex/
 
 MARVELL MWL8K WIRELESS DRIVER
-M:     Lennert Buytenhek <buytenh@wantstofly.org>
 L:     linux-wireless@vger.kernel.org
-S:     Odd Fixes
+S:     Orphan
 F:     drivers/net/wireless/marvell/mwl8k.c
 
 MARVELL NAND CONTROLLER DRIVER
@@ -13778,7 +13797,7 @@ F:      Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
 F:     drivers/spi/spi-at91-usart.c
 
 MICROCHIP AUDIO ASOC DRIVERS
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/sound/atmel*
@@ -13801,7 +13820,7 @@ S:      Maintained
 F:     drivers/crypto/atmel-ecc.*
 
 MICROCHIP EIC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/interrupt-controller/microchip,sama7g5-eic.yaml
@@ -13874,7 +13893,7 @@ F:      drivers/video/fbdev/atmel_lcdfb.c
 F:     include/video/atmel_lcdc.h
 
 MICROCHIP MCP16502 PMIC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt
@@ -13901,7 +13920,7 @@ F:      Documentation/devicetree/bindings/mtd/atmel-nand.txt
 F:     drivers/mtd/nand/raw/atmel/*
 
 MICROCHIP OTPC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml
@@ -13940,7 +13959,7 @@ F:      Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
 F:     drivers/fpga/microchip-spi.c
 
 MICROCHIP PWM DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-pwm@vger.kernel.org
 S:     Supported
@@ -13956,7 +13975,7 @@ F:      drivers/iio/adc/at91-sama5d2_adc.c
 F:     include/dt-bindings/iio/adc/at91-sama5d2_adc.h
 
 MICROCHIP SAMA5D2-COMPATIBLE SHUTDOWN CONTROLLER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 S:     Supported
 F:     Documentation/devicetree/bindings/power/reset/atmel,sama5d2-shdwc.yaml
 F:     drivers/power/reset/at91-sama5d2_shdwc.c
@@ -13968,12 +13987,12 @@ T:    git https://git.kernel.org/pub/scm/linux/kernel/git/conor/linux.git/
 F:     drivers/soc/microchip/
 
 MICROCHIP SPI DRIVER
-M:     Tudor Ambarus <tudor.ambarus@linaro.org>
+M:     Ryan Wanner <ryan.wanner@microchip.com>
 S:     Supported
 F:     drivers/spi/spi-atmel.*
 
 MICROCHIP SSC DRIVER
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/misc/atmel-ssc.txt
@@ -14002,7 +14021,7 @@ F:      drivers/usb/gadget/udc/atmel_usba_udc.*
 
 MICROCHIP WILC1000 WIFI DRIVER
 M:     Ajay Singh <ajay.kathat@microchip.com>
-M:     Claudiu Beznea <claudiu.beznea@microchip.com>
+M:     Claudiu Beznea <claudiu.beznea@tuxon.dev>
 L:     linux-wireless@vger.kernel.org
 S:     Supported
 F:     drivers/net/wireless/microchip/wilc1000/
@@ -14794,6 +14813,16 @@ F:     net/netfilter/xt_CONNSECMARK.c
 F:     net/netfilter/xt_SECMARK.c
 F:     net/netlabel/
 
+NETWORKING [MACSEC]
+M:     Sabrina Dubroca <sd@queasysnail.net>
+L:     netdev@vger.kernel.org
+S:     Maintained
+F:     drivers/net/macsec.c
+F:     include/net/macsec.h
+F:     include/uapi/linux/if_macsec.h
+K:     macsec
+K:     \bmdo_
+
 NETWORKING [MPTCP]
 M:     Matthieu Baerts <matthieu.baerts@tessares.net>
 M:     Mat Martineau <martineau@kernel.org>
@@ -16285,6 +16314,7 @@ F:      drivers/pci/controller/dwc/pci-exynos.c
 PCI DRIVER FOR SYNOPSYS DESIGNWARE
 M:     Jingoo Han <jingoohan1@gmail.com>
 M:     Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+M:     Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -17037,6 +17067,7 @@ F:      drivers/net/ppp/pptp.c
 PRESSURE STALL INFORMATION (PSI)
 M:     Johannes Weiner <hannes@cmpxchg.org>
 M:     Suren Baghdasaryan <surenb@google.com>
+R:     Peter Ziljstra <peterz@infradead.org>
 S:     Maintained
 F:     include/linux/psi*
 F:     kernel/sched/psi.c
@@ -17436,6 +17467,7 @@ F:      drivers/media/tuners/qt1010*
 
 QUALCOMM ATH12K WIRELESS DRIVER
 M:     Kalle Valo <kvalo@kernel.org>
+M:     Jeff Johnson <quic_jjohnson@quicinc.com>
 L:     ath12k@lists.infradead.org
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
@@ -17443,6 +17475,7 @@ F:      drivers/net/wireless/ath/ath12k/
 
 QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
 M:     Kalle Valo <kvalo@kernel.org>
+M:     Jeff Johnson <quic_jjohnson@quicinc.com>
 L:     ath10k@lists.infradead.org
 S:     Supported
 W:     https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
@@ -17452,6 +17485,7 @@ F:      drivers/net/wireless/ath/ath10k/
 
 QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
 M:     Kalle Valo <kvalo@kernel.org>
+M:     Jeff Johnson <quic_jjohnson@quicinc.com>
 L:     ath11k@lists.infradead.org
 S:     Supported
 W:     https://wireless.wiki.kernel.org/en/users/Drivers/ath11k
@@ -17543,6 +17577,7 @@ QUALCOMM ETHQOS ETHERNET DRIVER
 M:     Vinod Koul <vkoul@kernel.org>
 R:     Bhupesh Sharma <bhupesh.sharma@linaro.org>
 L:     netdev@vger.kernel.org
+L:     linux-arm-msm@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/qcom,ethqos.yaml
 F:     drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -17971,7 +18006,7 @@ T:      git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.g
 F:     drivers/net/wireless/realtek/rtlwifi/
 
 REALTEK WIRELESS DRIVER (rtw88)
-M:     Yan-Hsuan Chuang <tony0620emma@gmail.com>
+M:     Ping-Ke Shih <pkshih@realtek.com>
 L:     linux-wireless@vger.kernel.org
 S:     Maintained
 F:     drivers/net/wireless/realtek/rtw88/
@@ -18496,17 +18531,14 @@ RTL8180 WIRELESS DRIVER
 L:     linux-wireless@vger.kernel.org
 S:     Orphan
 W:     https://wireless.wiki.kernel.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
 F:     drivers/net/wireless/realtek/rtl818x/rtl8180/
 
 RTL8187 WIRELESS DRIVER
-M:     Herton Ronaldo Krzesinski <herton@canonical.com>
-M:     Hin-Tak Leung <htl10@users.sourceforge.net>
+M:     Hin-Tak Leung <hintak.leung@gmail.com>
 M:     Larry Finger <Larry.Finger@lwfinger.net>
 L:     linux-wireless@vger.kernel.org
 S:     Maintained
 W:     https://wireless.wiki.kernel.org/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
 F:     drivers/net/wireless/realtek/rtl818x/rtl8187/
 
 RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
@@ -18583,7 +18615,7 @@ L:      linux-s390@vger.kernel.org
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux.git
 F:     Documentation/driver-api/s390-drivers.rst
-F:     Documentation/s390/
+F:     Documentation/arch/s390/
 F:     arch/s390/
 F:     drivers/s390/
 F:     drivers/watchdog/diag288_wdt.c
@@ -18644,7 +18676,7 @@ M:      Niklas Schnelle <schnelle@linux.ibm.com>
 M:     Gerald Schaefer <gerald.schaefer@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
-F:     Documentation/s390/pci.rst
+F:     Documentation/arch/s390/pci.rst
 F:     arch/s390/pci/
 F:     drivers/pci/hotplug/s390_pci_hpc.c
 
@@ -18661,7 +18693,7 @@ M:      Halil Pasic <pasic@linux.ibm.com>
 M:     Jason Herne <jjherne@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
-F:     Documentation/s390/vfio-ap*
+F:     Documentation/arch/s390/vfio-ap*
 F:     drivers/s390/crypto/vfio_ap*
 
 S390 VFIO-CCW DRIVER
@@ -18671,7 +18703,7 @@ R:      Halil Pasic <pasic@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 L:     kvm@vger.kernel.org
 S:     Supported
-F:     Documentation/s390/vfio-ccw.rst
+F:     Documentation/arch/s390/vfio-ccw.rst
 F:     drivers/s390/cio/vfio_ccw*
 F:     include/uapi/linux/vfio_ccw.h
 
@@ -19213,13 +19245,6 @@ F:     Documentation/devicetree/bindings/serial/serial.yaml
 F:     drivers/tty/serdev/
 F:     include/linux/serdev.h
 
-SERIAL DRIVERS
-M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L:     linux-serial@vger.kernel.org
-S:     Maintained
-F:     Documentation/devicetree/bindings/serial/
-F:     drivers/tty/serial/
-
 SERIAL IR RECEIVER
 M:     Sean Young <sean@mess.org>
 L:     linux-media@vger.kernel.org
@@ -19271,7 +19296,6 @@ F:      drivers/misc/sgi-gru/
 SGI XP/XPC/XPNET DRIVER
 M:     Robin Holt <robinmholt@gmail.com>
 M:     Steve Wahl <steve.wahl@hpe.com>
-R:     Mike Travis <mike.travis@hpe.com>
 S:     Maintained
 F:     drivers/misc/sgi-xp/
 
@@ -20390,7 +20414,6 @@ F:      drivers/pwm/pwm-stm32*
 F:     include/linux/*/stm32-*tim*
 
 STMMAC ETHERNET DRIVER
-M:     Giuseppe Cavallaro <peppe.cavallaro@st.com>
 M:     Alexandre Torgue <alexandre.torgue@foss.st.com>
 M:     Jose Abreu <joabreu@synopsys.com>
 L:     netdev@vger.kernel.org
@@ -21049,6 +21072,39 @@ S:     Maintained
 F:     Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml
 F:     sound/soc/ti/
 
+TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIVERS
+M:     Shenghao Ding <shenghao-ding@ti.com>
+M:     Kevin Lu <kevin-lu@ti.com>
+M:     Baojun Xu <x1077012@ti.com>
+L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
+S:     Maintained
+F:     Documentation/devicetree/bindings/sound/tas2552.txt
+F:     Documentation/devicetree/bindings/sound/tas2562.yaml
+F:     Documentation/devicetree/bindings/sound/tas2770.yaml
+F:     Documentation/devicetree/bindings/sound/tas27xx.yaml
+F:     Documentation/devicetree/bindings/sound/ti,pcm1681.txt
+F:     Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml
+F:     Documentation/devicetree/bindings/sound/ti,tlv320*.yaml
+F:     Documentation/devicetree/bindings/sound/tlv320adcx140.yaml
+F:     Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
+F:     Documentation/devicetree/bindings/sound/tpa6130a2.txt
+F:     include/sound/tas2*.h
+F:     include/sound/tlv320*.h
+F:     include/sound/tpa6130a2-plat.h
+F:     sound/pci/hda/tas2781_hda_i2c.c
+F:     sound/soc/codecs/pcm1681.c
+F:     sound/soc/codecs/pcm1789*.*
+F:     sound/soc/codecs/pcm179x*.*
+F:     sound/soc/codecs/pcm186x*.*
+F:     sound/soc/codecs/pcm3008.*
+F:     sound/soc/codecs/pcm3060*.*
+F:     sound/soc/codecs/pcm3168a*.*
+F:     sound/soc/codecs/pcm5102a.c
+F:     sound/soc/codecs/pcm512x*.*
+F:     sound/soc/codecs/tas2*.*
+F:     sound/soc/codecs/tlv320*.*
+F:     sound/soc/codecs/tpa6130a2.*
+
 TEXAS INSTRUMENTS DMA DRIVERS
 M:     Peter Ujfalusi <peter.ujfalusi@gmail.com>
 L:     dmaengine@vger.kernel.org
@@ -21625,14 +21681,16 @@ W:    https://github.com/srcres258/linux-doc
 T:     git git://github.com/srcres258/linux-doc.git doc-zh-tw
 F:     Documentation/translations/zh_TW/
 
-TTY LAYER
+TTY LAYER AND SERIAL DRIVERS
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 M:     Jiri Slaby <jirislaby@kernel.org>
+L:     linux-kernel@vger.kernel.org
+L:     linux-serial@vger.kernel.org
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
+F:     Documentation/devicetree/bindings/serial/
 F:     Documentation/driver-api/serial/
 F:     drivers/tty/
-F:     drivers/tty/serial/serial_core.c
 F:     include/linux/selection.h
 F:     include/linux/serial.h
 F:     include/linux/serial_core.h
@@ -21661,11 +21719,14 @@ S:    Orphan
 F:     drivers/net/ethernet/dec/tulip/
 
 TUN/TAP driver
-M:     Maxim Krasnyansky <maxk@qti.qualcomm.com>
+M:     Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+M:     Jason Wang <jasowang@redhat.com>
 S:     Maintained
 W:     http://vtun.sourceforge.net/tun
 F:     Documentation/networking/tuntap.rst
 F:     arch/um/os-Linux/drivers/
+F:     drivers/net/tap.c
+F:     drivers/net/tun.c
 
 TURBOCHANNEL SUBSYSTEM
 M:     "Maciej W. Rozycki" <macro@orcam.me.uk>
@@ -21888,9 +21949,8 @@ S:      Maintained
 F:     drivers/usb/misc/apple-mfi-fastcharge.c
 
 USB AR5523 WIRELESS DRIVER
-M:     Pontus Fuchs <pontus.fuchs@gmail.com>
 L:     linux-wireless@vger.kernel.org
-S:     Maintained
+S:     Orphan
 F:     drivers/net/wireless/ath/ar5523/
 
 USB ATTACHED SCSI
@@ -22167,9 +22227,8 @@ F:      drivers/usb/gadget/legacy/webcam.c
 F:     include/uapi/linux/usb/g_uvc.h
 
 USB WIRELESS RNDIS DRIVER (rndis_wlan)
-M:     Jussi Kivilinna <jussi.kivilinna@iki.fi>
 L:     linux-wireless@vger.kernel.org
-S:     Maintained
+S:     Orphan
 F:     drivers/net/wireless/legacy/rndis_wlan.c
 
 USB XHCI DRIVER
@@ -22458,7 +22517,6 @@ L:      virtualization@lists.linux-foundation.org
 S:     Maintained
 F:     drivers/block/virtio_blk.c
 F:     drivers/scsi/virtio_scsi.c
-F:     drivers/vhost/scsi.c
 F:     include/uapi/linux/virtio_blk.h
 F:     include/uapi/linux/virtio_scsi.h
 
@@ -22557,6 +22615,16 @@ F:     include/linux/vhost_iotlb.h
 F:     include/uapi/linux/vhost.h
 F:     kernel/vhost_task.c
 
+VIRTIO HOST (VHOST-SCSI)
+M:     "Michael S. Tsirkin" <mst@redhat.com>
+M:     Jason Wang <jasowang@redhat.com>
+M:     Mike Christie <michael.christie@oracle.com>
+R:     Paolo Bonzini <pbonzini@redhat.com>
+R:     Stefan Hajnoczi <stefanha@redhat.com>
+L:     virtualization@lists.linux-foundation.org
+S:     Maintained
+F:     drivers/vhost/scsi.c
+
 VIRTIO I2C DRIVER
 M:     Conghui Chen <conghui.chen@intel.com>
 M:     Viresh Kumar <viresh.kumar@linaro.org>
@@ -22944,7 +23012,7 @@ F:      drivers/input/misc/wistron_btns.c
 
 WL3501 WIRELESS PCMCIA CARD DRIVER
 L:     linux-wireless@vger.kernel.org
-S:     Odd fixes
+S:     Orphan
 F:     drivers/net/wireless/legacy/wl3501*
 
 WMI BINARY MOF DRIVER
@@ -23124,7 +23192,8 @@ F:      arch/x86/platform
 
 X86 PLATFORM UV HPE SUPERDOME FLEX
 M:     Steve Wahl <steve.wahl@hpe.com>
-R:     Mike Travis <mike.travis@hpe.com>
+R:     Justin Ernst <justin.ernst@hpe.com>
+R:     Kyle Meyer <kyle.meyer@hpe.com>
 R:     Dimitri Sivanich <dimitri.sivanich@hpe.com>
 R:     Russ Anderson <russ.anderson@hpe.com>
 S:     Supported
@@ -23515,11 +23584,8 @@ S:     Maintained
 F:     mm/zbud.c
 
 ZD1211RW WIRELESS DRIVER
-M:     Ulrich Kunitz <kune@deine-taler.de>
 L:     linux-wireless@vger.kernel.org
-L:     zd1211-devs@lists.sourceforge.net (subscribers-only)
-S:     Maintained
-W:     http://zd1211.ath.cx/wiki/DriverRewrite
+S:     Orphan
 F:     drivers/net/wireless/zydas/zd1211rw/
 
 ZD1301 MEDIA DRIVER
index 47690c2..2fdd8b4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 5
 SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
@@ -555,11 +555,23 @@ LINUXINCLUDE    := \
                $(USERINCLUDE)
 
 KBUILD_AFLAGS   := -D__ASSEMBLY__ -fno-PIE
-KBUILD_CFLAGS   := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \
-                  -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE \
-                  -Werror=implicit-function-declaration -Werror=implicit-int \
-                  -Werror=return-type -Wno-format-security -funsigned-char \
-                  -std=gnu11
+
+KBUILD_CFLAGS :=
+KBUILD_CFLAGS += -std=gnu11
+KBUILD_CFLAGS += -fshort-wchar
+KBUILD_CFLAGS += -funsigned-char
+KBUILD_CFLAGS += -fno-common
+KBUILD_CFLAGS += -fno-PIE
+KBUILD_CFLAGS += -fno-strict-aliasing
+KBUILD_CFLAGS += -Wall
+KBUILD_CFLAGS += -Wundef
+KBUILD_CFLAGS += -Werror=implicit-function-declaration
+KBUILD_CFLAGS += -Werror=implicit-int
+KBUILD_CFLAGS += -Werror=return-type
+KBUILD_CFLAGS += -Werror=strict-prototypes
+KBUILD_CFLAGS += -Wno-format-security
+KBUILD_CFLAGS += -Wno-trigraphs
+
 KBUILD_CPPFLAGS := -D__KERNEL__
 KBUILD_RUSTFLAGS := $(rust_common_flags) \
                    --target=$(objtree)/scripts/target.json \
index aff2746..63c5d6a 100644 (file)
@@ -34,6 +34,9 @@ config ARCH_HAS_SUBPAGE_FAULTS
 config HOTPLUG_SMT
        bool
 
+config SMT_NUM_THREADS_DYNAMIC
+       bool
+
 # Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL
 config HOTPLUG_CORE_SYNC
        bool
index 714abe4..55bb1c0 100644 (file)
@@ -47,12 +47,6 @@ unsigned long __get_wchan(struct task_struct *p);
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
-
-#ifndef CONFIG_SMP
-/* Nothing to prefetch. */
-#define spin_lock_prefetch(lock)       do { } while (0)
-#endif
 
 extern inline void prefetch(const void *ptr)  
 { 
@@ -64,11 +58,4 @@ extern inline void prefetchw(const void *ptr)
        __builtin_prefetch(ptr, 1, 3);
 }
 
-#ifdef CONFIG_SMP
-extern inline void spin_lock_prefetch(const void *ptr)  
-{
-       __builtin_prefetch(ptr, 1, 3);
-}
-#endif
-
 #endif /* __ASM_ALPHA_PROCESSOR_H */
index d98701e..5db88b6 100644 (file)
@@ -97,7 +97,7 @@ struct osf_dirent {
        unsigned int d_ino;
        unsigned short d_reclen;
        unsigned short d_namlen;
-       char d_name[1];
+       char d_name[];
 };
 
 struct osf_dirent_callback {
index b650ff1..3d74735 100644 (file)
@@ -385,8 +385,7 @@ setup_memory(void *kernel_end)
 #endif /* CONFIG_BLK_DEV_INITRD */
 }
 
-int __init
-page_is_ram(unsigned long pfn)
+int page_is_ram(unsigned long pfn)
 {
        struct memclust_struct * cluster;
        struct memdesc_struct * memdesc;
index 1f13995..ad37569 100644 (file)
 559    common  futex_waitv                     sys_futex_waitv
 560    common  set_mempolicy_home_node         sys_ni_syscall
 561    common  cachestat                       sys_cachestat
+562    common  fchmodat2                       sys_fchmodat2
index 5b52d75..d9927d3 100644 (file)
                valid-mask = <0x003fffff>;
        };
 
-       pci: pciv3@62000000 {
+       pci: pci@62000000 {
                compatible = "arm,integrator-ap-pci", "v3,v360epc-pci";
                device_type = "pci";
                #interrupt-cells = <1>;
index 8b53997..73d570a 100644 (file)
                                status = "disabled";
 
                                uart4: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <13 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart5: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        atmel,usart-mode = <AT91_USART_MODE_SERIAL>;
                                        interrupts = <14 IRQ_TYPE_LEVEL_HIGH 7>;
                                status = "disabled";
 
                                uart11: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <32 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart12: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <33 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart6: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <9 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart7: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <10 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart8: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <11 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart0: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <5 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart1: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <6 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart2: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <7 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart3: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <8 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart9: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <15 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
                                status = "disabled";
 
                                uart10: serial@200 {
-                                       compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+                                       compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
                                        reg = <0x200 0x200>;
                                        interrupts = <16 IRQ_TYPE_LEVEL_HIGH 7>;
                                        dmas = <&dma0
index bb240e6..088bcc3 100644 (file)
                        };
 
                        watchdog: watchdog@90060000 {
-                               compatible = "arm,amba-primecell";
+                               compatible = "arm,primecell";
                                reg = <0x90060000 0x1000>;
                                interrupts = <3>;
                        };
index 103e731..1a00d29 100644 (file)
        status = "okay";
 };
 
+&cpu0 {
+       /* CPU rated to 800 MHz, not the default 1.2GHz. */
+       operating-points = <
+               /* kHz   uV */
+               166666  850000
+               400000  900000
+               800000  1050000
+       >;
+};
+
 &ecspi1 {
        pinctrl-names = "default";
        pinctrl-0 = <&pinctrl_ecspi1>;
index 1a599c2..1ca4d21 100644 (file)
                pinctrl-0 = <&pinctrl_rtc_int>;
                reg = <0x68>;
                interrupt-parent = <&gpio7>;
-               interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+               interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
                status = "disabled";
        };
 };
index 2873369..3659fd5 100644 (file)
                                reg = <0x020ca000 0x1000>;
                                interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
                                clocks = <&clks IMX6SLL_CLK_USBPHY2>;
-                               phy-reg_3p0-supply = <&reg_3p0>;
+                               phy-3p0-supply = <&reg_3p0>;
                                fsl,anatop = <&anatop>;
                        };
 
index 3a43086..a05069d 100644 (file)
                                                        reg = <0>;
 
                                                        ldb_from_lcdif1: endpoint {
-                                                               remote-endpoint = <&lcdif1_to_ldb>;
                                                        };
                                                };
 
                                         <&clks IMX6SX_CLK_USDHC1>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-start-tap = <20>;
+                               fsl,tuning-step= <2>;
                                status = "disabled";
                        };
 
                                         <&clks IMX6SX_CLK_USDHC2>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-start-tap = <20>;
+                               fsl,tuning-step= <2>;
                                status = "disabled";
                        };
 
                                         <&clks IMX6SX_CLK_USDHC3>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-start-tap = <20>;
+                               fsl,tuning-step= <2>;
                                status = "disabled";
                        };
 
                                        power-domains = <&pd_disp>;
                                        status = "disabled";
 
-                                       ports {
-                                               port {
-                                                       lcdif1_to_ldb: endpoint {
-                                                               remote-endpoint = <&ldb_from_lcdif1>;
-                                                       };
+                                       port {
+                                               lcdif1_to_ldb: endpoint {
                                                };
                                        };
                                };
index 54026c2..6ffb428 100644 (file)
                                        <&clks IMX7D_USDHC1_ROOT_CLK>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-step = <2>;
+                               fsl,tuning-start-tap = <20>;
                                status = "disabled";
                        };
 
                                        <&clks IMX7D_USDHC2_ROOT_CLK>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-step = <2>;
+                               fsl,tuning-start-tap = <20>;
                                status = "disabled";
                        };
 
                                        <&clks IMX7D_USDHC3_ROOT_CLK>;
                                clock-names = "ipg", "ahb", "per";
                                bus-width = <4>;
+                               fsl,tuning-step = <2>;
+                               fsl,tuning-start-tap = <20>;
                                status = "disabled";
                        };
 
index b958607..96451c8 100644 (file)
                        /* MDIO */
                        AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLUP | SLEWCTRL_FAST, MUX_MODE0)
                        AM33XX_PADCONF(AM335X_PIN_MDC, PIN_OUTPUT_PULLUP, MUX_MODE0)
+                       /* Added to support GPIO controlled PHY reset */
+                       AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_OUTPUT_PULLUP, MUX_MODE7)
                >;
        };
 
                        /* MDIO reset value */
                        AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLDOWN, MUX_MODE7)
                        AM33XX_PADCONF(AM335X_PIN_MDC, PIN_INPUT_PULLDOWN, MUX_MODE7)
+                       /* Added to support GPIO controlled PHY reset */
+                       AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_INPUT_PULLDOWN, MUX_MODE7)
                >;
        };
 
        baseboard_eeprom: baseboard_eeprom@50 {
                compatible = "atmel,24c256";
                reg = <0x50>;
+               vcc-supply = <&ldo4_reg>;
 
                #address-cells = <1>;
                #size-cells = <1>;
 
        ethphy0: ethernet-phy@0 {
                reg = <0>;
+               /* Support GPIO reset on revision C3 boards */
+               reset-gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+               reset-assert-us = <300>;
+               reset-deassert-us = <6500>;
        };
 };
 
index bfbaa2d..d1c5508 100644 (file)
@@ -197,7 +197,7 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=y
 CONFIG_FSCACHE=y
index 821d966..05ea717 100644 (file)
@@ -232,7 +232,7 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_XFS_FS=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
index b0f0baa..53b1d41 100644 (file)
@@ -327,7 +327,7 @@ CONFIG_PWM_SAMSUNG=y
 CONFIG_PHY_EXYNOS5250_SATA=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
index 87c4893..c9f4594 100644 (file)
@@ -94,7 +94,7 @@ CONFIG_LEDS_CLASS=y
 CONFIG_LEDS_TRIGGERS=y
 CONFIG_LEDS_TRIGGER_TIMER=y
 CONFIG_EXT2_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_MSDOS_FS=m
index 0570669..0a90583 100644 (file)
@@ -442,7 +442,7 @@ CONFIG_EXT3_FS_SECURITY=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=y
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index d7a0bca..1cb1456 100644 (file)
@@ -207,7 +207,7 @@ CONFIG_RESET_TI_SYSCON=m
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
index d7df048..e2b0ff0 100644 (file)
@@ -162,7 +162,7 @@ CONFIG_MAX517=y
 CONFIG_PWM=y
 CONFIG_PWM_LPC32XX=y
 CONFIG_EXT2_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
index 385ad0f..7d42845 100644 (file)
@@ -81,7 +81,7 @@ CONFIG_SOC_BRCMSTB=y
 CONFIG_MEMORY=y
 # CONFIG_ARM_PMU is not set
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
index f0800f8..c7b2550 100644 (file)
@@ -1226,7 +1226,7 @@ CONFIG_COUNTER=m
 CONFIG_STM32_TIMER_CNT=m
 CONFIG_STM32_LPTIMER_CNT=m
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
index 53dd071..7c2cc7a 100644 (file)
@@ -188,7 +188,7 @@ CONFIG_RTC_DRV_OMAP=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_DNOTIFY is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_MSDOS_FS=y
index 9bd36dd..b685018 100644 (file)
@@ -678,7 +678,7 @@ CONFIG_EXT4_FS_SECURITY=y
 CONFIG_FANOTIFY=y
 CONFIG_QUOTA=y
 CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
index b46e393..b0c3355 100644 (file)
@@ -589,7 +589,7 @@ CONFIG_REISERFS_FS_XATTR=y
 CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_CUSE=m
 CONFIG_FSCACHE=y
index 65a3fde..b1d12a2 100644 (file)
@@ -79,7 +79,7 @@ CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_PCF8583=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_MSDOS_FS=m
index 4c1e480..72df854 100644 (file)
@@ -103,7 +103,7 @@ CONFIG_PHY_SAMSUNG_USB2=m
 CONFIG_PHY_S5PV210_USB2=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
index 70739e0..d6dfae1 100644 (file)
@@ -136,7 +136,7 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT3_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_NTFS_RW=y
index bfde0c8..c8128a6 100644 (file)
@@ -85,7 +85,7 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
index a96ed5c..97ea2e9 100644 (file)
@@ -68,7 +68,7 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
index 3e2c2ab..a7a3413 100644 (file)
@@ -54,7 +54,7 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
index dfeed44..fe4326d 100644 (file)
@@ -25,6 +25,9 @@ static inline int syscall_get_nr(struct task_struct *task,
        if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
                return task_thread_info(task)->abi_syscall;
 
+       if (task_thread_info(task)->abi_syscall == -1)
+               return -1;
+
        return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
 }
 
index bcc4c9e..5c31e9d 100644 (file)
@@ -90,6 +90,7 @@ slow_work_pending:
        cmp     r0, #0
        beq     no_work_pending
        movlt   scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
+       str     scno, [tsk, #TI_ABI_SYSCALL]    @ make sure tracers see update
        ldmia   sp, {r0 - r6}                   @ have to reload r0 - r6
        b       local_restart                   @ ... and off we go
 ENDPROC(ret_fast_syscall)
index 054e919..dc0fb7a 100644 (file)
@@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
        hw->address &= ~alignment_mask;
        hw->ctrl.len <<= offset;
 
-       if (is_default_overflow_handler(bp)) {
+       if (uses_default_overflow_handler(bp)) {
                /*
                 * Mismatch breakpoints are required for single-stepping
                 * breakpoints.
@@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
                 * Otherwise, insert a temporary mismatch breakpoint so that
                 * we can single-step over the watchpoint trigger.
                 */
-               if (!is_default_overflow_handler(wp))
+               if (!uses_default_overflow_handler(wp))
                        continue;
 step:
                enable_single_step(wp, instruction_pointer(regs));
@@ -811,7 +811,7 @@ step:
                info->trigger = addr;
                pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
                perf_bp_event(wp, regs);
-               if (is_default_overflow_handler(wp))
+               if (uses_default_overflow_handler(wp))
                        enable_single_step(wp, instruction_pointer(regs));
        }
 
@@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
                        info->trigger = addr;
                        pr_debug("breakpoint fired: address = 0x%x\n", addr);
                        perf_bp_event(bp, regs);
-                       if (is_default_overflow_handler(bp))
+                       if (uses_default_overflow_handler(bp))
                                enable_single_step(bp, addr);
                        goto unlock;
                }
index 2d8e251..fef32d7 100644 (file)
@@ -783,8 +783,9 @@ long arch_ptrace(struct task_struct *child, long request,
                        break;
 
                case PTRACE_SET_SYSCALL:
-                       task_thread_info(child)->abi_syscall = data &
-                                                       __NR_SYSCALL_MASK;
+                       if (data != -1)
+                               data &= __NR_SYSCALL_MASK;
+                       task_thread_info(child)->abi_syscall = data;
                        ret = 0;
                        break;
 
index 20e4cab..623167f 100644 (file)
@@ -105,5 +105,4 @@ void sharpsl_pm_led(int val);
 #define MAX1111_ACIN_VOLT   6u
 int sharpsl_pm_pxa_read_max1111(int channel);
 
-void corgi_lcd_limit_intensity(int limit);
 #endif
index 1c021ce..8bc4ea5 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/apm-emulation.h>
+#include <linux/spi/corgi_lcd.h>
 
 #include <asm/irq.h>
 #include <asm/mach-types.h>
index 8ba450a..61ad965 100644 (file)
@@ -8,8 +8,8 @@
  */
 
 #include <linux/io.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include "common.h"
 
 /* register offsets */
index 8ebed8a..c572d6c 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index 7856c3a..29db061 100644 (file)
@@ -197,6 +197,8 @@ config ARM64
                    !CC_OPTIMIZE_FOR_SIZE)
        select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
                if DYNAMIC_FTRACE_WITH_ARGS
+       select HAVE_SAMPLE_FTRACE_DIRECT
+       select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
        select HAVE_FAST_GUP
        select HAVE_FTRACE_MCOUNT_RECORD
@@ -1791,9 +1793,6 @@ config ARM64_PAN
          The feature is detected at runtime, and will remain as a 'nop'
          instruction if the cpu does not implement the feature.
 
-config AS_HAS_LDAPR
-       def_bool $(as-instr,.arch_extension rcpc)
-
 config AS_HAS_LSE_ATOMICS
        def_bool $(as-instr,.arch_extension lse)
 
@@ -1931,6 +1930,9 @@ config AS_HAS_ARMV8_3
 config AS_HAS_CFI_NEGATE_RA_STATE
        def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n)
 
+config AS_HAS_LDAPR
+       def_bool $(as-instr,.arch_extension rcpc)
+
 endmenu # "ARMv8.3 architectural features"
 
 menu "ARMv8.4 architectural features"
index 38ae674..3037f58 100644 (file)
        status = "okay";
        clock-frequency = <100000>;
        i2c-sda-falling-time-ns = <890>;  /* hcnt */
-       i2c-sdl-falling-time-ns = <890>;  /* lcnt */
+       i2c-scl-falling-time-ns = <890>;  /* lcnt */
 
        pinctrl-names = "default", "gpio";
        pinctrl-0 = <&i2c1_pmx_func>;
index ede99dc..f4cf30b 100644 (file)
        status = "okay";
        clock-frequency = <100000>;
        i2c-sda-falling-time-ns = <890>;  /* hcnt */
-       i2c-sdl-falling-time-ns = <890>;  /* lcnt */
+       i2c-scl-falling-time-ns = <890>;  /* lcnt */
 
        adc@14 {
                compatible = "lltc,ltc2497";
diff --git a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi
deleted file mode 120000 (symlink)
index 68fd0f8..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi
\ No newline at end of file
index 03e7679..479948f 100644 (file)
 };
 
 &gpio1 {
-       gpio-line-names = "nINT_ETHPHY", "LED_RED", "WDOG_INT", "X_RTC_INT",
+       gpio-line-names = "", "LED_RED", "WDOG_INT", "X_RTC_INT",
                "", "", "", "RESET_ETHPHY",
                "CAN_nINT", "CAN_EN", "nENABLE_FLATLINK", "",
                "USB_OTG_VBUS_EN", "", "LED_GREEN", "LED_BLUE";
index 92616bc..847f085 100644 (file)
 };
 
 &gpio1 {
-       gpio-line-names = "nINT_ETHPHY", "", "WDOG_INT", "X_RTC_INT",
+       gpio-line-names = "", "", "WDOG_INT", "X_RTC_INT",
                "", "", "", "RESET_ETHPHY",
                "", "", "nENABLE_FLATLINK";
 };
                                };
                        };
 
-                       reg_vdd_gpu: buck3 {
+                       reg_vdd_vpu: buck3 {
                                regulator-always-on;
                                regulator-boot-on;
                                regulator-max-microvolt = <1000000>;
index 6f26914..07b07dc 100644 (file)
        status = "okay";
 };
 
+&disp_blk_ctrl {
+       status = "disabled";
+};
+
 &pgc_mipi {
        status = "disabled";
 };
index 93088fa..d5b7168 100644 (file)
        status = "okay";
 };
 
+&disp_blk_ctrl {
+       status = "disabled";
+};
+
 &pgc_mipi {
        status = "disabled";
 };
index d6b36f0..1a647d4 100644 (file)
                                compatible = "fsl,imx8mm-mipi-csi2";
                                reg = <0x32e30000 0x1000>;
                                interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
-                               assigned-clocks = <&clk IMX8MM_CLK_CSI1_CORE>,
-                                                 <&clk IMX8MM_CLK_CSI1_PHY_REF>;
-                               assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_1000M>,
-                                                         <&clk IMX8MM_SYS_PLL2_1000M>;
+                               assigned-clocks = <&clk IMX8MM_CLK_CSI1_CORE>;
+                               assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_1000M>;
+
                                clock-frequency = <333000000>;
                                clocks = <&clk IMX8MM_CLK_DISP_APB_ROOT>,
                                         <&clk IMX8MM_CLK_CSI1_ROOT>,
index d3a6710..b8946ed 100644 (file)
                        MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC           0x91
                        MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL     0x91
                        MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL     0x1f
-                       MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9               0x19
+                       MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9               0x159
                >;
        };
 
index 9869fe7..aa38dd6 100644 (file)
                                compatible = "fsl,imx8mm-mipi-csi2";
                                reg = <0x32e30000 0x1000>;
                                interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
-                               assigned-clocks = <&clk IMX8MN_CLK_CAMERA_PIXEL>,
-                                                 <&clk IMX8MN_CLK_CSI1_PHY_REF>;
-                               assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_1000M>,
-                                                         <&clk IMX8MN_SYS_PLL2_1000M>;
+                               assigned-clocks = <&clk IMX8MN_CLK_CAMERA_PIXEL>;
+                               assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_1000M>;
                                assigned-clock-rates = <333000000>;
                                clock-frequency = <333000000>;
                                clocks = <&clk IMX8MN_CLK_DISP_APB_ROOT>,
index 1a2d2c0..01eec42 100644 (file)
                                                                         <&clk IMX8MQ_SYS1_PLL_800M>,
                                                                         <&clk IMX8MQ_VPU_PLL>;
                                                assigned-clock-rates = <600000000>,
-                                                                      <600000000>,
+                                                                      <300000000>,
                                                                       <800000000>,
                                                                       <0>;
                                        };
index 8643612..1d8dd14 100644 (file)
 
                        anatop: anatop@44480000 {
                                compatible = "fsl,imx93-anatop", "syscon";
-                               reg = <0x44480000 0x10000>;
+                               reg = <0x44480000 0x2000>;
                        };
 
                        adc1: adc@44530000 {
index 9022ad7..a9e7b83 100644 (file)
                        };
                };
 
-               pm8150l-thermal {
+               pm8150l-pcb-thermal {
                        polling-delay-passive = <0>;
                        polling-delay = <0>;
                        thermal-sensors = <&pm8150l_adc_tm 1>;
index ab767cf..26f5a4e 100644 (file)
 
                vreg_l4c: ldo4 {
                        regulator-name = "vreg_l4c";
-                       regulator-min-microvolt = <1100000>;
-                       regulator-max-microvolt = <1300000>;
+                       regulator-min-microvolt = <1200000>;
+                       regulator-max-microvolt = <1200000>;
                        regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
                        /*
                         * FIXME: This should have regulator-allow-set-load but
index e25dc2b..06df931 100644 (file)
                                reg = <0 0x0ae94400 0 0x200>,
                                      <0 0x0ae94600 0 0x280>,
                                      <0 0x0ae94a00 0 0x1e0>;
-                               reg-names = "dsi0_phy",
-                                           "dsi0_phy_lane",
+                               reg-names = "dsi_phy",
+                                           "dsi_phy_lane",
                                            "dsi_pll";
 
                                #clock-cells = <1>;
index d3ae185..be78a93 100644 (file)
                };
 
                osm_l3: interconnect@18321000 {
-                       compatible = "qcom,sc8180x-osm-l3";
+                       compatible = "qcom,sc8180x-osm-l3", "qcom,osm-l3";
                        reg = <0 0x18321000 0 0x1400>;
 
                        clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
index 18c822a..b46e55b 100644 (file)
@@ -56,7 +56,7 @@
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD0>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
@@ -85,7 +85,7 @@
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD1>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD2>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD3>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD4>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD5>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD6>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        qcom,freq-domain = <&cpufreq_hw 2>;
                        operating-points-v2 = <&cpu7_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+                                       <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
                        power-domains = <&CPU_PD7>;
                        power-domain-names = "psci";
                        #cooling-cells = <2>;
                        clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
                        clock-names = "xo", "alternate";
 
-                       #interconnect-cells = <2>;
+                       #interconnect-cells = <1>;
                };
 
                cpufreq_hw: cpufreq@18323000 {
index 83ab6de..1efa07f 100644 (file)
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_0: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_100: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_200: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 0>;
                        operating-points-v2 = <&cpu0_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_300: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_400: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_500: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 1>;
                        operating-points-v2 = <&cpu4_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_600: l2-cache {
                                compatible = "cache";
                        qcom,freq-domain = <&cpufreq_hw 2>;
                        operating-points-v2 = <&cpu7_opp_table>;
                        interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
-                                       <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+                                       <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
                        #cooling-cells = <2>;
                        L2_700: l2-cache {
                                compatible = "cache";
                        clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
                        clock-names = "xo", "alternate";
 
-                       #interconnect-cells = <2>;
+                       #interconnect-cells = <1>;
                };
 
                cpufreq_hw: cpufreq@18591000 {
index 88ef478..ec451c6 100644 (file)
                        qcom,controlled-remotely;
                        iommus = <&apps_smmu 0x594 0x0011>,
                                 <&apps_smmu 0x596 0x0011>;
+                       /* FIXME: Probing BAM DMA causes some abort and system hang */
+                       status = "fail";
                };
 
                crypto: crypto@1dfa000 {
                                 <&apps_smmu 0x596 0x0011>;
                        interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>;
                        interconnect-names = "memory";
+                       /* FIXME: dependency BAM DMA is disabled */
+                       status = "disabled";
                };
 
                ipa: ipa@1e40000 {
index 232910e..66f68fc 100644 (file)
                                     <GIC_SPI 212 IRQ_TYPE_EDGE_RISING>,
                                     <GIC_SPI 213 IRQ_TYPE_EDGE_RISING>;
                        interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0",
-                                         "tgiv0", "tgie0", "tgif0",
-                                         "tgia1", "tgib1", "tgiv1", "tgiu1",
-                                         "tgia2", "tgib2", "tgiv2", "tgiu2",
+                                         "tciv0", "tgie0", "tgif0",
+                                         "tgia1", "tgib1", "tciv1", "tciu1",
+                                         "tgia2", "tgib2", "tciv2", "tciu2",
                                          "tgia3", "tgib3", "tgic3", "tgid3",
-                                         "tgiv3",
+                                         "tciv3",
                                          "tgia4", "tgib4", "tgic4", "tgid4",
-                                         "tgiv4",
+                                         "tciv4",
                                          "tgiu5", "tgiv5", "tgiw5",
                                          "tgia6", "tgib6", "tgic6", "tgid6",
-                                         "tgiv6",
+                                         "tciv6",
                                          "tgia7", "tgib7", "tgic7", "tgid7",
-                                         "tgiv7",
+                                         "tciv7",
                                          "tgia8", "tgib8", "tgic8", "tgid8",
-                                         "tgiv8", "tgiu8";
+                                         "tciv8", "tciu8";
                        clocks = <&cpg CPG_MOD R9A07G044_MTU_X_MCK_MTU3>;
                        power-domains = <&cpg>;
                        resets = <&cpg R9A07G044_MTU_X_PRESET_MTU3>;
index 2eba3a8..1f1d481 100644 (file)
                                     <GIC_SPI 212 IRQ_TYPE_EDGE_RISING>,
                                     <GIC_SPI 213 IRQ_TYPE_EDGE_RISING>;
                        interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0",
-                                         "tgiv0", "tgie0", "tgif0",
-                                         "tgia1", "tgib1", "tgiv1", "tgiu1",
-                                         "tgia2", "tgib2", "tgiv2", "tgiu2",
+                                         "tciv0", "tgie0", "tgif0",
+                                         "tgia1", "tgib1", "tciv1", "tciu1",
+                                         "tgia2", "tgib2", "tciv2", "tciu2",
                                          "tgia3", "tgib3", "tgic3", "tgid3",
-                                         "tgiv3",
+                                         "tciv3",
                                          "tgia4", "tgib4", "tgic4", "tgid4",
-                                         "tgiv4",
+                                         "tciv4",
                                          "tgiu5", "tgiv5", "tgiw5",
                                          "tgia6", "tgib6", "tgic6", "tgid6",
-                                         "tgiv6",
+                                         "tciv6",
                                          "tgia7", "tgib7", "tgic7", "tgid7",
-                                         "tgiv7",
+                                         "tciv7",
                                          "tgia8", "tgib8", "tgic8", "tgid8",
-                                         "tgiv8", "tgiu8";
+                                         "tciv8", "tciu8";
                        clocks = <&cpg CPG_MOD R9A07G054_MTU_X_MCK_MTU3>;
                        power-domains = <&cpg>;
                        resets = <&cpg R9A07G054_MTU_X_PRESET_MTU3>;
index 8332c8a..42ce78b 100644 (file)
                        };
                        power-domain@PX30_PD_MMC_NAND {
                                reg = <PX30_PD_MMC_NAND>;
-                               clocks =  <&cru HCLK_NANDC>,
-                                         <&cru HCLK_EMMC>,
-                                         <&cru HCLK_SDIO>,
-                                         <&cru HCLK_SFC>,
-                                         <&cru SCLK_EMMC>,
-                                         <&cru SCLK_NANDC>,
-                                         <&cru SCLK_SDIO>,
-                                         <&cru SCLK_SFC>;
+                               clocks = <&cru HCLK_NANDC>,
+                                        <&cru HCLK_EMMC>,
+                                        <&cru HCLK_SDIO>,
+                                        <&cru HCLK_SFC>,
+                                        <&cru SCLK_EMMC>,
+                                        <&cru SCLK_NANDC>,
+                                        <&cru SCLK_SDIO>,
+                                        <&cru SCLK_SFC>;
                                pm_qos = <&qos_emmc>, <&qos_nand>,
                                         <&qos_sdio>, <&qos_sfc>;
                                #power-domain-cells = <0>;
index 7ea4816..9232357 100644 (file)
                regulator-name = "vdd_core";
                regulator-min-microvolt = <827000>;
                regulator-max-microvolt = <1340000>;
-               regulator-init-microvolt = <1015000>;
                regulator-settling-time-up-us = <250>;
                regulator-always-on;
                regulator-boot-on;
index a71f249..e9810d2 100644 (file)
                regulator-name = "vdd_core";
                regulator-min-microvolt = <827000>;
                regulator-max-microvolt = <1340000>;
-               regulator-init-microvolt = <1015000>;
                regulator-settling-time-up-us = <250>;
                regulator-always-on;
                regulator-boot-on;
index d1f3433..6464ef4 100644 (file)
                compatible = "brcm,bcm4329-fmac";
                reg = <1>;
                interrupt-parent = <&gpio0>;
-               interrupts = <RK_PA3 GPIO_ACTIVE_HIGH>;
+               interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-names = "host-wake";
                pinctrl-names = "default";
                pinctrl-0 = <&wifi_host_wake_l>;
index b6e082f..7c5f441 100644 (file)
                        vcc_sdio: LDO_REG4 {
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <3000000>;
                                regulator-min-microvolt = <1800000>;
                                regulator-max-microvolt = <3300000>;
                                regulator-name = "vcc_sdio";
index 028eb50..8bfd5f8 100644 (file)
 &sdhci {
        max-frequency = <150000000>;
        bus-width = <8>;
-       mmc-hs400-1_8v;
+       mmc-hs200-1_8v;
        non-removable;
-       mmc-hs400-enhanced-strobe;
        status = "okay";
 };
 
index 907071d..980c453 100644 (file)
@@ -45,7 +45,7 @@
        sdio_pwrseq: sdio-pwrseq {
                compatible = "mmc-pwrseq-simple";
                clocks = <&rk808 1>;
-               clock-names = "ext_clock";
+               clock-names = "lpo";
                pinctrl-names = "default";
                pinctrl-0 = <&wifi_enable_h>;
                reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
 };
 
 &sdhci {
+       max-frequency = <150000000>;
        bus-width = <8>;
-       mmc-hs400-1_8v;
-       mmc-hs400-enhanced-strobe;
+       mmc-hs200-1_8v;
        non-removable;
        status = "okay";
 };
index cec3b7b..8a17c1e 100644 (file)
@@ -31,7 +31,7 @@
                compatible = "brcm,bcm4329-fmac";
                reg = <1>;
                interrupt-parent = <&gpio0>;
-               interrupts = <RK_PA3 GPIO_ACTIVE_HIGH>;
+               interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-names = "host-wake";
                pinctrl-names = "default";
                pinctrl-0 = <&wifi_host_wake_l>;
index a2c31d5..8cbf3d9 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_logic";
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_gpu";
                regulator-boot-on;
                regulator-min-microvolt = <712500>;
                regulator-max-microvolt = <1390000>;
-               regulator-init-microvolt = <900000>;
                regulator-name = "vdd_cpu";
                regulator-ramp-delay = <2300>;
                vin-supply = <&vcc_sys>;
index 410cd3e..0c18406 100644 (file)
 
 &gmac1 {
        assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>;
-       assigned-clock-parents =  <&cru SCLK_GMAC1_RGMII_SPEED>, <&gmac1_clkin>;
+       assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&gmac1_clkin>;
        phy-mode = "rgmii";
        clock_in_out = "input";
        pinctrl-names = "default";
                compatible = "brcm,bcm4329-fmac";
                reg = <1>;
                interrupt-parent = <&gpio2>;
-               interrupts = <RK_PB2 GPIO_ACTIVE_HIGH>;
+               interrupts = <RK_PB2 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-names = "host-wake";
                pinctrl-names = "default";
                pinctrl-0 = <&wifi_host_wake_h>;
index ff936b7..1c6d83b 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
index 8d61f82..d899087 100644 (file)
                                regulator-always-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-name = "vdd_gpu_npu";
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
index 25a8c78..854d02b 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_logic";
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_gpu";
index b276eb0..2d92713 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
 
                                regulator-state-mem {
                                regulator-boot-on;
                                regulator-min-microvolt = <900000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
 
                                regulator-state-mem {
index 5e4236a..1b1c67d 100644 (file)
 
 &mdio1 {
        rgmii_phy1: ethernet-phy@0 {
-               compatible="ethernet-phy-ieee802.3-c22";
-               reg= <0x0>;
+               compatible = "ethernet-phy-ieee802.3-c22";
+               reg = <0x0>;
        };
 };
 
index 42889c5..938092f 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
 
                                regulator-state-mem {
                                regulator-name = "vdd_gpu";
                                regulator-min-microvolt = <900000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
 
                                regulator-state-mem {
index 31aa2b8..63bae36 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                regulator-state-mem {
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
                                        regulator-state-mem {
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-name = "vdd_npu";
                                regulator-state-mem {
index ff0bf24..f9127dd 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index 6747925..19f8fc3 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index 25e2056..89e84e3 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vcca1v8_image: LDO_REG9 {
                                regulator-name = "vcca1v8_image";
-                               regulator-init-microvolt = <950000>;
                                regulator-min-microvolt = <950000>;
                                regulator-max-microvolt = <1800000>;
 
index e653b06..a8a4cc1 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
                                regulator-boot-on;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
-                               regulator-init-microvolt = <900000>;
                                regulator-ramp-delay = <6001>;
                                regulator-initial-mode = <0x2>;
 
index 58ba328..93189f8 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index 59ecf86..a337f54 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index c50fbdd..45b03dc 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index 917f5b2..e05ab11 100644 (file)
                                regulator-name = "vdd_logic";
                                regulator-always-on;
                                regulator-boot-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
                        vdd_gpu: DCDC_REG2 {
                                regulator-name = "vdd_gpu";
                                regulator-always-on;
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
 
                        vdd_npu: DCDC_REG4 {
                                regulator-name = "vdd_npu";
-                               regulator-init-microvolt = <900000>;
                                regulator-initial-mode = <0x2>;
                                regulator-min-microvolt = <500000>;
                                regulator-max-microvolt = <1350000>;
index afda976..5153703 100644 (file)
                                regulator-boot-on;
                                regulator-min-microvolt = <550000>;
                                regulator-max-microvolt = <950000>;
-                               regulator-init-microvolt = <750000>;
                                regulator-ramp-delay = <12500>;
                                regulator-name = "vdd_vdenc_s0";
 
index 4d9ed2a..1a60a27 100644 (file)
        cpu-supply = <&vdd_cpu_lit_s0>;
 };
 
-&cpu_b0{
+&cpu_b0 {
        cpu-supply = <&vdd_cpu_big0_s0>;
 };
 
-&cpu_b1{
+&cpu_b1 {
        cpu-supply = <&vdd_cpu_big0_s0>;
 };
 
-&cpu_b2{
+&cpu_b2 {
        cpu-supply = <&vdd_cpu_big1_s0>;
 };
 
-&cpu_b3{
+&cpu_b3 {
        cpu-supply = <&vdd_cpu_big1_s0>;
 };
 
index 0777bca..a25d783 100644 (file)
@@ -1469,7 +1469,7 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
 CONFIG_CUSE=m
 CONFIG_OVERLAY_FS=m
index bd68e1b..4d537d5 100644 (file)
@@ -42,6 +42,9 @@
 #define ACPI_MADT_GICC_SPE  (offsetof(struct acpi_madt_generic_interrupt, \
        spe_interrupt) + sizeof(u16))
 
+#define ACPI_MADT_GICC_TRBE  (offsetof(struct acpi_madt_generic_interrupt, \
+       trbe_interrupt) + sizeof(u16))
+
 /* Basic configuration for ACPI */
 #ifdef CONFIG_ACPI
 pgprot_t __acpi_get_mem_attribute(phys_addr_t addr);
index 4cf2cb0..f482b99 100644 (file)
@@ -30,28 +30,16 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md,
                                bool has_bti);
 
-#define arch_efi_call_virt_setup()                                     \
-({                                                                     \
-       efi_virtmap_load();                                             \
-       __efi_fpsimd_begin();                                           \
-       raw_spin_lock(&efi_rt_lock);                                    \
-})
-
 #undef arch_efi_call_virt
 #define arch_efi_call_virt(p, f, args...)                              \
        __efi_rt_asm_wrapper((p)->f, #f, args)
 
-#define arch_efi_call_virt_teardown()                                  \
-({                                                                     \
-       raw_spin_unlock(&efi_rt_lock);                                  \
-       __efi_fpsimd_end();                                             \
-       efi_virtmap_unload();                                           \
-})
-
-extern raw_spinlock_t efi_rt_lock;
 extern u64 *efi_rt_stack_top;
 efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
 
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
+
 /*
  * efi_rt_stack_top[-1] contains the value the stack pointer had before
  * switching to the EFI runtime stack.
index 8e5ffb5..b7afaa0 100644 (file)
 .Lskip_hcrx_\@:
 .endm
 
+/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */
+.macro __check_hvhe fail, tmp
+       mrs     \tmp, hcr_el2
+       and     \tmp, \tmp, #HCR_E2H
+       cbz     \tmp, \fail
+.endm
+
 /*
  * Allow Non-secure EL1 and EL0 to access physical timer and counter.
  * This is not necessary for VHE, since the host kernel runs in EL2,
@@ -43,9 +50,7 @@
  */
 .macro __init_el2_timers
        mov     x0, #3                          // Enable EL1 physical timers
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .LnVHE_\@
+       __check_hvhe .LnVHE_\@, x1
        lsl     x0, x0, #10
 .LnVHE_\@:
        msr     cnthctl_el2, x0
 
 /* Coprocessor traps */
 .macro __init_el2_cptr
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .LnVHE_\@
+       __check_hvhe .LnVHE_\@, x1
        mov     x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
-       b       .Lset_cptr_\@
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_\@
 .LnVHE_\@:
        mov     x0, #0x33ff
-.Lset_cptr_\@:
        msr     cptr_el2, x0                    // Disable copro. traps to EL2
+.Lskip_set_cptr_\@:
 .endm
 
 /* Disable any fine grained traps */
        check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
 
 .Linit_sve_\@: /* SVE register access */
-       mrs     x0, cptr_el2                    // Disable SVE traps
-       mrs     x1, hcr_el2
-       and     x1, x1, #HCR_E2H
-       cbz     x1, .Lcptr_nvhe_\@
+       __check_hvhe .Lcptr_nvhe_\@, x1
 
-       // VHE case
+       // (h)VHE case
+       mrs     x0, cpacr_el1                   // Disable SVE traps
        orr     x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
-       b       .Lset_cptr_\@
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_\@
 
 .Lcptr_nvhe_\@: // nVHE case
+       mrs     x0, cptr_el2                    // Disable SVE traps
        bic     x0, x0, #CPTR_EL2_TZ
-.Lset_cptr_\@:
        msr     cptr_el2, x0
+.Lskip_set_cptr_\@:
        isb
        mov     x1, #ZCR_ELx_LEN_MASK           // SVE: Enable full vector
        msr_s   SYS_ZCR_EL2, x1                 // length for EL1.
        check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
 
 .Linit_sme_\@: /* SME register access and priority mapping */
+       __check_hvhe .Lcptr_nvhe_sme_\@, x1
+
+       // (h)VHE case
+       mrs     x0, cpacr_el1                   // Disable SME traps
+       orr     x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
+       msr     cpacr_el1, x0
+       b       .Lskip_set_cptr_sme_\@
+
+.Lcptr_nvhe_sme_\@: // nVHE case
        mrs     x0, cptr_el2                    // Disable SME traps
        bic     x0, x0, #CPTR_EL2_TSM
        msr     cptr_el2, x0
+.Lskip_set_cptr_sme_\@:
        isb
 
        mrs     x1, sctlr_el2
index 67f2fb7..8df46f1 100644 (file)
@@ -356,7 +356,7 @@ static inline int sme_max_virtualisable_vl(void)
        return vec_max_virtualisable_vl(ARM64_VEC_SME);
 }
 
-extern void sme_alloc(struct task_struct *task);
+extern void sme_alloc(struct task_struct *task, bool flush);
 extern unsigned int sme_get_vl(void);
 extern int sme_set_current_vl(unsigned long arg);
 extern int sme_get_current_vl(void);
@@ -388,7 +388,7 @@ static inline void sme_smstart_sm(void) { }
 static inline void sme_smstop_sm(void) { }
 static inline void sme_smstop(void) { }
 
-static inline void sme_alloc(struct task_struct *task) { }
+static inline void sme_alloc(struct task_struct *task, bool flush) { }
 static inline void sme_setup(void) { }
 static inline unsigned int sme_get_vl(void) { return 0; }
 static inline int sme_max_vl(void) { return 0; }
index 21ac1c5..ab15819 100644 (file)
@@ -211,6 +211,10 @@ static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs
 {
        return ret_regs->fp;
 }
+
+void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
+                          unsigned long frame_pointer);
+
 #endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER  */
 #endif
 
index 692b1ec..5212674 100644 (file)
 #define KERNEL_HWCAP_SME_B16B16                __khwcap2_feature(SME_B16B16)
 #define KERNEL_HWCAP_SME_F16F16                __khwcap2_feature(SME_F16F16)
 #define KERNEL_HWCAP_MOPS              __khwcap2_feature(MOPS)
+#define KERNEL_HWCAP_HBC               __khwcap2_feature(HBC)
 
 /*
  * This yields a mask that user programs can use to figure out what
index 5777738..85d2614 100644 (file)
 #define SWAPPER_RX_MMUFLAGS    (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
 #endif
 
-/*
- * To make optimal use of block mappings when laying out the linear
- * mapping, round down the base of physical memory to a size that can
- * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
- * (64k granule), or a multiple that can be mapped using contiguous bits
- * in the page tables: 32 * PMD_SIZE (16k granule)
- */
-#if defined(CONFIG_ARM64_4K_PAGES)
-#define ARM64_MEMSTART_SHIFT           PUD_SHIFT
-#elif defined(CONFIG_ARM64_16K_PAGES)
-#define ARM64_MEMSTART_SHIFT           CONT_PMD_SHIFT
-#else
-#define ARM64_MEMSTART_SHIFT           PMD_SHIFT
-#endif
-
-/*
- * sparsemem vmemmap imposes an additional requirement on the alignment of
- * memstart_addr, due to the fact that the base of the vmemmap region
- * has a direct correspondence, and needs to appear sufficiently aligned
- * in the virtual address space.
- */
-#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
-#define ARM64_MEMSTART_ALIGN   (1UL << SECTION_SIZE_BITS)
-#else
-#define ARM64_MEMSTART_ALIGN   (1UL << ARM64_MEMSTART_SHIFT)
-#endif
-
 #endif /* __ASM_KERNEL_PGTABLE_H */
index 7d170aa..24e28bb 100644 (file)
@@ -278,7 +278,7 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void);
 asmlinkage void kvm_unexpected_el2_exception(void);
 struct kvm_cpu_context;
 void handle_trap(struct kvm_cpu_context *host_ctxt);
-asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on);
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
 void __noreturn __pkvm_init_finalise(void);
 void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
 void kvm_patch_vector_branch(struct alt_instr *alt,
index efc0b45..3d6725f 100644 (file)
@@ -571,6 +571,14 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
        return test_bit(feature, vcpu->arch.features);
 }
 
+static __always_inline void kvm_write_cptr_el2(u64 val)
+{
+       if (has_vhe() || has_hvhe())
+               write_sysreg(val, cpacr_el1);
+       else
+               write_sysreg(val, cptr_el2);
+}
+
 static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
 {
        u64 val;
@@ -578,8 +586,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
        if (has_vhe()) {
                val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
                       CPACR_EL1_ZEN_EL1EN);
+               if (cpus_have_final_cap(ARM64_SME))
+                       val |= CPACR_EL1_SMEN_EL1EN;
        } else if (has_hvhe()) {
                val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+
+               if (!vcpu_has_sve(vcpu) ||
+                   (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED))
+                       val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
+               if (cpus_have_final_cap(ARM64_SME))
+                       val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
        } else {
                val = CPTR_NVHE_EL2_RES1;
 
@@ -597,9 +613,6 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
 {
        u64 val = kvm_get_reset_cptr_el2(vcpu);
 
-       if (has_vhe() || has_hvhe())
-               write_sysreg(val, cpacr_el1);
-       else
-               write_sysreg(val, cptr_el2);
+       kvm_write_cptr_el2(val);
 }
 #endif /* __ARM64_KVM_EMULATE_H__ */
index 8b60967..d3dd05b 100644 (file)
@@ -727,6 +727,8 @@ struct kvm_vcpu_arch {
 #define DBG_SS_ACTIVE_PENDING  __vcpu_single_flag(sflags, BIT(5))
 /* PMUSERENR for the guest EL0 is on physical CPU */
 #define PMUSERENR_ON_CPU       __vcpu_single_flag(sflags, BIT(6))
+/* WFI instruction trapped */
+#define IN_WFI                 __vcpu_single_flag(sflags, BIT(7))
 
 
 /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
index 8294a9a..929d355 100644 (file)
@@ -608,22 +608,26 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
 
 /**
- * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
+ * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
+ *                                        flag in a page-table entry.
  * @pgt:       Page-table structure initialised by kvm_pgtable_stage2_init*().
  * @addr:      Intermediate physical address to identify the page-table entry.
+ * @size:      Size of the address range to visit.
+ * @mkold:     True if the access flag should be cleared.
  *
  * The offset of @addr within a page is ignored.
  *
- * If there is a valid, leaf page-table entry used to translate @addr, then
- * clear the access flag in that entry.
+ * Tests and conditionally clears the access flag for every valid, leaf
+ * page-table entry used to translate the range [@addr, @addr + @size).
  *
  * Note that it is the caller's responsibility to invalidate the TLB after
  * calling this function to ensure that the updated permissions are visible
  * to the CPUs.
  *
- * Return: The old page-table entry prior to clearing the flag, 0 on failure.
+ * Return: True if any of the visited PTEs had the access flag set.
  */
-kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
+bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
+                                        u64 size, bool mkold);
 
 /**
  * kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
@@ -646,18 +650,6 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
                                   enum kvm_pgtable_prot prot);
 
 /**
- * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
- *                                access flag set.
- * @pgt:       Page-table structure initialised by kvm_pgtable_stage2_init*().
- * @addr:      Intermediate physical address to identify the page-table entry.
- *
- * The offset of @addr within a page is ignored.
- *
- * Return: True if the page-table entry has the access flag set, false otherwise.
- */
-bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
-
-/**
  * kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
  *                                   of Coherency for guest stage-2 address
  *                                   range.
index 4384eaa..94b6885 100644 (file)
@@ -64,7 +64,6 @@ extern void arm64_memblock_init(void);
 extern void paging_init(void);
 extern void bootmem_init(void);
 extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt);
-extern void init_mem_pgprot(void);
 extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt,
                                   phys_addr_t size, pgprot_t prot);
 extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys,
index 0bd18de..72c2e84 100644 (file)
@@ -103,6 +103,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
 #define pte_young(pte)         (!!(pte_val(pte) & PTE_AF))
 #define pte_special(pte)       (!!(pte_val(pte) & PTE_SPECIAL))
 #define pte_write(pte)         (!!(pte_val(pte) & PTE_WRITE))
+#define pte_rdonly(pte)                (!!(pte_val(pte) & PTE_RDONLY))
 #define pte_user(pte)          (!!(pte_val(pte) & PTE_USER))
 #define pte_user_exec(pte)     (!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)          (!!(pte_val(pte) & PTE_CONT))
@@ -120,7 +121,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys)
        (__boundary - 1 < (end) - 1) ? __boundary : (end);                      \
 })
 
-#define pte_hw_dirty(pte)      (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY))
+#define pte_hw_dirty(pte)      (pte_write(pte) && !pte_rdonly(pte))
 #define pte_sw_dirty(pte)      (!!(pte_val(pte) & PTE_DIRTY))
 #define pte_dirty(pte)         (pte_sw_dirty(pte) || pte_hw_dirty(pte))
 
@@ -212,7 +213,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
         * clear), set the PTE_DIRTY bit.
         */
        if (pte_hw_dirty(pte))
-               pte = pte_mkdirty(pte);
+               pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
 
        pte = clear_pte_bit(pte, __pgprot(PTE_WRITE));
        pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
@@ -823,7 +824,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
                              PTE_ATTRINDX_MASK;
        /* preserve the hardware dirty information */
        if (pte_hw_dirty(pte))
-               pte = pte_mkdirty(pte);
+               pte = set_pte_bit(pte, __pgprot(PTE_DIRTY));
+
        pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask);
        return pte;
 }
index 3918f2a..e5bc545 100644 (file)
@@ -359,14 +359,6 @@ static inline void prefetchw(const void *ptr)
        asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr));
 }
 
-#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *ptr)
-{
-       asm volatile(ARM64_LSE_ATOMIC_INSN(
-                    "prfm pstl1strm, %a0",
-                    "nop") : : "p" (ptr));
-}
-
 extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
 extern void __init minsigstksz_setup(void);
 
index 4292d9b..484cb69 100644 (file)
@@ -17,6 +17,9 @@
 
 #include <asm/virt.h>
 
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
 extern unsigned long sdei_exit_mode;
 
 /* Software Delegated Exception entry point from firmware*/
@@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num,
                                                   unsigned long pc,
                                                   unsigned long pstate);
 
+/* Abort a running handler. Context is discarded. */
+void __sdei_handler_abort(void);
+
 /*
  * The above entry point does the minimum to call C code. This function does
  * anything else, before calling the driver.
index 4cfe9b4..ab8e14b 100644 (file)
@@ -85,4 +85,7 @@ static inline int syscall_get_arch(struct task_struct *task)
        return AUDIT_ARCH_AARCH64;
 }
 
+int syscall_trace_enter(struct pt_regs *regs);
+void syscall_trace_exit(struct pt_regs *regs);
+
 #endif /* __ASM_SYSCALL_H */
index b481935..16464bf 100644 (file)
 /*
  * For registers without architectural names, or simply unsupported by
  * GAS.
+ *
+ * __check_r forces warnings to be generated by the compiler when
+ * evaluating r which wouldn't normally happen due to being passed to
+ * the assembler via __stringify(r).
  */
 #define read_sysreg_s(r) ({                                            \
        u64 __val;                                                      \
+       u32 __maybe_unused __check_r = (u32)(r);                        \
        asm volatile(__mrs_s("%0", r) : "=r" (__val));                  \
        __val;                                                          \
 })
 
 #define write_sysreg_s(v, r) do {                                      \
        u64 __val = (u64)(v);                                           \
+       u32 __maybe_unused __check_r = (u32)(r);                        \
        asm volatile(__msr_s(r, "%x0") : : "rZ" (__val));               \
 } while (0)
 
index 64a514f..bd77253 100644 (file)
@@ -39,7 +39,7 @@
 #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END            (__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls           452
+#define __NR_compat_syscalls           453
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
index d952a28..78b6831 100644 (file)
@@ -909,6 +909,8 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 #define __NR_cachestat 451
 __SYSCALL(__NR_cachestat, sys_cachestat)
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
 
 /*
  * Please add new compat syscalls above this comment and update
index 5227db7..261d6e9 100644 (file)
@@ -78,6 +78,7 @@ extern u32 __boot_cpu_mode[2];
 
 void __hyp_set_vectors(phys_addr_t phys_vector_base);
 void __hyp_reset_vectors(void);
+bool is_kvm_arm_initialised(void);
 
 DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
 
diff --git a/arch/arm64/include/uapi/asm/bitsperlong.h b/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..485d60b
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_BITSPERLONG_H */
index a2cac43..53026f4 100644 (file)
 #define HWCAP2_SME_B16B16      (1UL << 41)
 #define HWCAP2_SME_F16F16      (1UL << 42)
 #define HWCAP2_MOPS            (1UL << 43)
+#define HWCAP2_HBC             (1UL << 44)
 
 #endif /* _UAPI__ASM_HWCAP_H */
index f9d456f..a5f533f 100644 (file)
@@ -222,7 +222,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
+       ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH),
                       FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0),
@@ -2708,12 +2708,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
                .desc = "Enhanced Virtualization Traps",
                .capability = ARM64_HAS_EVT,
                .type = ARM64_CPUCAP_SYSTEM_FEATURE,
-               .sys_reg = SYS_ID_AA64MMFR2_EL1,
-               .sign = FTR_UNSIGNED,
-               .field_pos = ID_AA64MMFR2_EL1_EVT_SHIFT,
-               .field_width = 4,
-               .min_field_value = ID_AA64MMFR2_EL1_EVT_IMP,
                .matches = has_cpuid_feature,
+               ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP)
        },
        {},
 };
@@ -2844,6 +2840,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
        HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES),
        HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT),
        HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS),
+       HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC),
 #ifdef CONFIG_ARM64_SME
        HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME),
        HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64),
index d1f6859..f372295 100644 (file)
@@ -9,8 +9,6 @@
 #include <linux/acpi.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/psci.h>
 
 #ifdef CONFIG_ACPI_PROCESSOR_IDLE
index 58622dc..98fda85 100644 (file)
@@ -126,6 +126,7 @@ static const char *const hwcap_str[] = {
        [KERNEL_HWCAP_SME_B16B16]       = "smeb16b16",
        [KERNEL_HWCAP_SME_F16F16]       = "smef16f16",
        [KERNEL_HWCAP_MOPS]             = "mops",
+       [KERNEL_HWCAP_HBC]              = "hbc",
 };
 
 #ifdef CONFIG_COMPAT
index baab8dd..49efbdb 100644 (file)
@@ -158,7 +158,21 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
        return s;
 }
 
-DEFINE_RAW_SPINLOCK(efi_rt_lock);
+static DEFINE_RAW_SPINLOCK(efi_rt_lock);
+
+void arch_efi_call_virt_setup(void)
+{
+       efi_virtmap_load();
+       __efi_fpsimd_begin();
+       raw_spin_lock(&efi_rt_lock);
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+       raw_spin_unlock(&efi_rt_lock);
+       __efi_fpsimd_end();
+       efi_virtmap_unload();
+}
 
 asmlinkage u64 *efi_rt_stack_top __ro_after_init;
 
index 6b2e0c3..0fc9420 100644 (file)
@@ -355,6 +355,35 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
 }
 #endif /* CONFIG_ARM64_ERRATUM_1463225 */
 
+/*
+ * As per the ABI exit SME streaming mode and clear the SVE state not
+ * shared with FPSIMD on syscall entry.
+ */
+static inline void fp_user_discard(void)
+{
+       /*
+        * If SME is active then exit streaming mode.  If ZA is active
+        * then flush the SVE registers but leave userspace access to
+        * both SVE and SME enabled, otherwise disable SME for the
+        * task and fall through to disabling SVE too.  This means
+        * that after a syscall we never have any streaming mode
+        * register state to track, if this changes the KVM code will
+        * need updating.
+        */
+       if (system_supports_sme())
+               sme_smstop_sm();
+
+       if (!system_supports_sve())
+               return;
+
+       if (test_thread_flag(TIF_SVE)) {
+               unsigned int sve_vq_minus_one;
+
+               sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
+               sve_flush_live(true, sve_vq_minus_one);
+       }
+}
+
 UNHANDLED(el1t, 64, sync)
 UNHANDLED(el1t, 64, irq)
 UNHANDLED(el1t, 64, fiq)
@@ -644,6 +673,8 @@ static void noinstr el0_svc(struct pt_regs *regs)
 {
        enter_from_user_mode(regs);
        cortex_a76_erratum_1463225_svc_handler();
+       fp_user_discard();
+       local_daif_restore(DAIF_PROCCTX);
        do_el0_svc(regs);
        exit_to_user_mode(regs);
 }
@@ -783,6 +814,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs)
 {
        enter_from_user_mode(regs);
        cortex_a76_erratum_1463225_svc_handler();
+       local_daif_restore(DAIF_PROCCTX);
        do_el0_svc_compat(regs);
        exit_to_user_mode(regs);
 }
index a40e5e5..6ad61de 100644 (file)
@@ -986,9 +986,13 @@ SYM_CODE_START(__sdei_asm_handler)
 
        mov     x19, x1
 
-#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+       /* Store the registered-event for crash_smp_send_stop() */
        ldrb    w4, [x19, #SDEI_EVENT_PRIORITY]
-#endif
+       cbnz    w4, 1f
+       adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
+       b       2f
+1:     adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
+2:     str     x19, [x5]
 
 #ifdef CONFIG_VMAP_STACK
        /*
@@ -1055,6 +1059,14 @@ SYM_CODE_START(__sdei_asm_handler)
 
        ldr_l   x2, sdei_exit_mode
 
+       /* Clear the registered-event seen by crash_smp_send_stop() */
+       ldrb    w3, [x4, #SDEI_EVENT_PRIORITY]
+       cbnz    w3, 1f
+       adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6
+       b       2f
+1:     adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6
+2:     str     xzr, [x5]
+
 alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
        sdei_handler_exit exit_mode=x2
 alternative_else_nop_endif
@@ -1065,4 +1077,15 @@ alternative_else_nop_endif
 #endif
 SYM_CODE_END(__sdei_asm_handler)
 NOKPROBE(__sdei_asm_handler)
+
+SYM_CODE_START(__sdei_handler_abort)
+       mov_q   x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME
+       adr     x1, 1f
+       ldr_l   x2, sdei_exit_mode
+       sdei_handler_exit exit_mode=x2
+       // exit the handler and jump to the next instruction.
+       // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx.
+1:     ret
+SYM_CODE_END(__sdei_handler_abort)
+NOKPROBE(__sdei_handler_abort)
 #endif /* CONFIG_ARM_SDE_INTERFACE */
index 7a1aeb9..91e44ac 100644 (file)
@@ -679,7 +679,7 @@ static void fpsimd_to_sve(struct task_struct *task)
        void *sst = task->thread.sve_state;
        struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
 
-       if (!system_supports_sve())
+       if (!system_supports_sve() && !system_supports_sme())
                return;
 
        vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -705,7 +705,7 @@ static void sve_to_fpsimd(struct task_struct *task)
        unsigned int i;
        __uint128_t const *p;
 
-       if (!system_supports_sve())
+       if (!system_supports_sve() && !system_supports_sme())
                return;
 
        vl = thread_get_cur_vl(&task->thread);
@@ -835,7 +835,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
        void *sst = task->thread.sve_state;
        struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
 
-       if (!test_tsk_thread_flag(task, TIF_SVE))
+       if (!test_tsk_thread_flag(task, TIF_SVE) &&
+           !thread_sm_enabled(&task->thread))
                return;
 
        vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -847,6 +848,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
 int vec_set_vector_length(struct task_struct *task, enum vec_type type,
                          unsigned long vl, unsigned long flags)
 {
+       bool free_sme = false;
+
        if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
                                     PR_SVE_SET_VL_ONEXEC))
                return -EINVAL;
@@ -897,24 +900,39 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
                task->thread.fp_type = FP_STATE_FPSIMD;
        }
 
-       if (system_supports_sme() && type == ARM64_VEC_SME) {
-               task->thread.svcr &= ~(SVCR_SM_MASK |
-                                      SVCR_ZA_MASK);
-               clear_thread_flag(TIF_SME);
+       if (system_supports_sme()) {
+               if (type == ARM64_VEC_SME ||
+                   !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
+                       /*
+                        * We are changing the SME VL or weren't using
+                        * SME anyway, discard the state and force a
+                        * reallocation.
+                        */
+                       task->thread.svcr &= ~(SVCR_SM_MASK |
+                                              SVCR_ZA_MASK);
+                       clear_tsk_thread_flag(task, TIF_SME);
+                       free_sme = true;
+               }
        }
 
        if (task == current)
                put_cpu_fpsimd_context();
 
+       task_set_vl(task, type, vl);
+
        /*
-        * Force reallocation of task SVE and SME state to the correct
-        * size on next use:
+        * Free the changed states if they are not in use, SME will be
+        * reallocated to the correct size on next use and we just
+        * allocate SVE now in case it is needed for use in streaming
+        * mode.
         */
-       sve_free(task);
-       if (system_supports_sme() && type == ARM64_VEC_SME)
-               sme_free(task);
+       if (system_supports_sve()) {
+               sve_free(task);
+               sve_alloc(task, true);
+       }
 
-       task_set_vl(task, type, vl);
+       if (free_sme)
+               sme_free(task);
 
 out:
        update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
@@ -1161,9 +1179,6 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
  */
 u64 read_zcr_features(void)
 {
-       u64 zcr;
-       unsigned int vq_max;
-
        /*
         * Set the maximum possible VL, and write zeroes to all other
         * bits to see if they stick.
@@ -1171,12 +1186,8 @@ u64 read_zcr_features(void)
        sve_kernel_enable(NULL);
        write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1);
 
-       zcr = read_sysreg_s(SYS_ZCR_EL1);
-       zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */
-       vq_max = sve_vq_from_vl(sve_get_vl());
-       zcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
-       return zcr;
+       /* Return LEN value that would be written to get the maximum VL */
+       return sve_vq_from_vl(sve_get_vl()) - 1;
 }
 
 void __init sve_setup(void)
@@ -1267,9 +1278,9 @@ void fpsimd_release_task(struct task_struct *dead_task)
  * the interest of testability and predictability, the architecture
  * guarantees that when ZA is enabled it will be zeroed.
  */
-void sme_alloc(struct task_struct *task)
+void sme_alloc(struct task_struct *task, bool flush)
 {
-       if (task->thread.sme_state) {
+       if (task->thread.sme_state && flush) {
                memset(task->thread.sme_state, 0, sme_state_size(task));
                return;
        }
@@ -1331,9 +1342,6 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
  */
 u64 read_smcr_features(void)
 {
-       u64 smcr;
-       unsigned int vq_max;
-
        sme_kernel_enable(NULL);
 
        /*
@@ -1342,12 +1350,8 @@ u64 read_smcr_features(void)
        write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK,
                       SYS_SMCR_EL1);
 
-       smcr = read_sysreg_s(SYS_SMCR_EL1);
-       smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */
-       vq_max = sve_vq_from_vl(sme_get_vl());
-       smcr |= vq_max - 1; /* set LEN field to maximum effective value */
-
-       return smcr;
+       /* Return LEN value that would be written to get the maximum VL */
+       return sve_vq_from_vl(sme_get_vl()) - 1;
 }
 
 void __init sme_setup(void)
@@ -1497,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
        }
 
        sve_alloc(current, false);
-       sme_alloc(current);
+       sme_alloc(current, true);
        if (!current->thread.sve_state || !current->thread.sme_state) {
                force_sig(SIGKILL);
                return;
@@ -1649,7 +1653,6 @@ void fpsimd_flush_thread(void)
 
                fpsimd_flush_thread_vl(ARM64_VEC_SME);
                current->thread.svcr = 0;
-               sme_smstop();
        }
 
        current->thread.fp_type = FP_STATE_FPSIMD;
index 757a0de..7b23699 100644 (file)
@@ -113,7 +113,7 @@ SYM_CODE_START(primary_entry)
         */
 #if VA_BITS > 48
        mrs_s   x0, SYS_ID_AA64MMFR2_EL1
-       tst     x0, #0xf << ID_AA64MMFR2_EL1_VARange_SHIFT
+       tst     x0, ID_AA64MMFR2_EL1_VARange_MASK
        mov     x0, #VA_BITS
        mov     x25, #VA_BITS_MIN
        csel    x25, x25, x0, eq
@@ -756,7 +756,7 @@ SYM_FUNC_START(__cpu_secondary_check52bitva)
        b.ne    2f
 
        mrs_s   x0, SYS_ID_AA64MMFR2_EL1
-       and     x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT)
+       and     x0, x0, ID_AA64MMFR2_EL1_VARange_MASK
        cbnz    x0, 2f
 
        update_early_cpu_boot_status \
index db2a186..3522563 100644 (file)
@@ -654,7 +654,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr,
                perf_bp_event(bp, regs);
 
                /* Do we need to handle the stepping? */
-               if (is_default_overflow_handler(bp))
+               if (uses_default_overflow_handler(bp))
                        step = 1;
 unlock:
                rcu_read_unlock();
@@ -733,7 +733,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
 static int watchpoint_report(struct perf_event *wp, unsigned long addr,
                             struct pt_regs *regs)
 {
-       int step = is_default_overflow_handler(wp);
+       int step = uses_default_overflow_handler(wp);
        struct arch_hw_breakpoint *info = counter_arch_bp(wp);
 
        info->trigger = addr;
index 2fe2491..aee12c7 100644 (file)
@@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
                if (!len)
                        return;
 
-               len = min(len, ARRAY_SIZE(buf) - 1);
-               strncpy(buf, cmdline, len);
-               buf[len] = 0;
+               len = strscpy(buf, cmdline, ARRAY_SIZE(buf));
+               if (len == -E2BIG)
+                       len = ARRAY_SIZE(buf) - 1;
 
                if (strcmp(buf, "--") == 0)
                        return;
index 2276689..f872c57 100644 (file)
@@ -11,8 +11,6 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/of_pci.h>
-#include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
 #include <linux/pci-ecam.h>
index d7f4f0d..20d7ef8 100644 (file)
@@ -881,10 +881,18 @@ static int sve_set_common(struct task_struct *target,
                        break;
                case ARM64_VEC_SME:
                        target->thread.svcr |= SVCR_SM_MASK;
+
+                       /*
+                        * Disable traps and ensure there is SME storage but
+                        * preserve any currently set values in ZA/ZT.
+                        */
+                       sme_alloc(target, false);
+                       set_tsk_thread_flag(target, TIF_SME);
                        break;
                default:
                        WARN_ON_ONCE(1);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto out;
                }
 
                /*
@@ -932,11 +940,13 @@ static int sve_set_common(struct task_struct *target,
        /*
         * Ensure target->thread.sve_state is up to date with target's
         * FPSIMD regs, so that a short copyin leaves trailing
-        * registers unmodified.  Always enable SVE even if going into
-        * streaming mode.
+        * registers unmodified.  Only enable SVE if we are
+        * configuring normal SVE, a system with streaming SVE may not
+        * have normal SVE.
         */
        fpsimd_sync_to_sve(target);
-       set_tsk_thread_flag(target, TIF_SVE);
+       if (type == ARM64_VEC_SVE)
+               set_tsk_thread_flag(target, TIF_SVE);
        target->thread.fp_type = FP_STATE_SVE;
 
        BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
@@ -1098,7 +1108,7 @@ static int za_set(struct task_struct *target,
        }
 
        /* Allocate/reinit ZA storage */
-       sme_alloc(target);
+       sme_alloc(target, true);
        if (!target->thread.sme_state) {
                ret = -ENOMEM;
                goto out;
@@ -1168,8 +1178,13 @@ static int zt_set(struct task_struct *target,
        if (!system_supports_sme2())
                return -EINVAL;
 
+       /* Ensure SVE storage in case this is first use of SME */
+       sve_alloc(target, false);
+       if (!target->thread.sve_state)
+               return -ENOMEM;
+
        if (!thread_za_enabled(&target->thread)) {
-               sme_alloc(target);
+               sme_alloc(target, true);
                if (!target->thread.sme_state)
                        return -ENOMEM;
        }
@@ -1177,8 +1192,12 @@ static int zt_set(struct task_struct *target,
        ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                 thread_zt_state(&target->thread),
                                 0, ZT_SIG_REG_BYTES);
-       if (ret == 0)
+       if (ret == 0) {
                target->thread.svcr |= SVCR_ZA_MASK;
+               set_tsk_thread_flag(target, TIF_SME);
+       }
+
+       fpsimd_flush_task_state(target);
 
        return ret;
 }
index 830be01..255d12f 100644 (file)
@@ -47,6 +47,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr);
 DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr);
 #endif
 
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event);
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event);
+
 static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu)
 {
        unsigned long *p;
index e304f7e..c7ebe74 100644 (file)
@@ -475,7 +475,7 @@ static int restore_za_context(struct user_ctxs *user)
        fpsimd_flush_task_state(current);
        /* From now, fpsimd_thread_switch() won't touch thread.sve_state */
 
-       sme_alloc(current);
+       sme_alloc(current, true);
        if (!current->thread.sme_state) {
                current->thread.svcr &= ~SVCR_ZA_MASK;
                clear_thread_flag(TIF_SME);
index edd6389..960b98b 100644 (file)
@@ -1044,10 +1044,8 @@ void crash_smp_send_stop(void)
         * If this cpu is the only one alive at this point in time, online or
         * not, there are no stop messages to be sent around, so just back out.
         */
-       if (num_other_online_cpus() == 0) {
-               sdei_mask_local_cpu();
-               return;
-       }
+       if (num_other_online_cpus() == 0)
+               goto skip_ipi;
 
        cpumask_copy(&mask, cpu_online_mask);
        cpumask_clear_cpu(smp_processor_id(), &mask);
@@ -1066,7 +1064,9 @@ void crash_smp_send_stop(void)
                pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
                        cpumask_pr_args(&mask));
 
+skip_ipi:
        sdei_mask_local_cpu();
+       sdei_handler_abort();
 }
 
 bool smp_crash_stop_failed(void)
index 5a668d7..9a70d97 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/randomize_kstack.h>
 #include <linux/syscalls.h>
 
-#include <asm/daifflags.h>
 #include <asm/debug-monitors.h>
 #include <asm/exception.h>
 #include <asm/fpsimd.h>
@@ -75,9 +74,6 @@ static inline bool has_syscall_work(unsigned long flags)
        return unlikely(flags & _TIF_SYSCALL_WORK);
 }
 
-int syscall_trace_enter(struct pt_regs *regs);
-void syscall_trace_exit(struct pt_regs *regs);
-
 static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
                           const syscall_fn_t syscall_table[])
 {
@@ -104,8 +100,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
         * (Similarly for HVC and SMC elsewhere.)
         */
 
-       local_daif_restore(DAIF_PROCCTX);
-
        if (flags & _TIF_MTE_ASYNC_FAULT) {
                /*
                 * Process the asynchronous tag check fault before the actual
@@ -156,38 +150,8 @@ trace_exit:
        syscall_trace_exit(regs);
 }
 
-/*
- * As per the ABI exit SME streaming mode and clear the SVE state not
- * shared with FPSIMD on syscall entry.
- */
-static inline void fp_user_discard(void)
-{
-       /*
-        * If SME is active then exit streaming mode.  If ZA is active
-        * then flush the SVE registers but leave userspace access to
-        * both SVE and SME enabled, otherwise disable SME for the
-        * task and fall through to disabling SVE too.  This means
-        * that after a syscall we never have any streaming mode
-        * register state to track, if this changes the KVM code will
-        * need updating.
-        */
-       if (system_supports_sme())
-               sme_smstop_sm();
-
-       if (!system_supports_sve())
-               return;
-
-       if (test_thread_flag(TIF_SVE)) {
-               unsigned int sve_vq_minus_one;
-
-               sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1;
-               sve_flush_live(true, sve_vq_minus_one);
-       }
-}
-
 void do_el0_svc(struct pt_regs *regs)
 {
-       fp_user_discard();
        el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table);
 }
 
index 6028f1f..45354f2 100644 (file)
@@ -50,9 +50,7 @@ SECTIONS
 
        . = ALIGN(4);
        .altinstructions : {
-               __alt_instructions = .;
                *(.altinstructions)
-               __alt_instructions_end = .;
        }
 
        .dynamic        : { *(.dynamic) }               :text   :dynamic
index 4236cf3..9941c5b 100644 (file)
@@ -6,6 +6,10 @@
  *
  */
 
+int __kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+int __kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res);
+
 int __kernel_clock_gettime(clockid_t clock,
                           struct __kernel_timespec *ts)
 {
index 0696732..6dcdae4 100644 (file)
@@ -827,8 +827,8 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
        assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
        assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
 
-       /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */
-       sysreg_clear_set(cntkctl_el1, clr, set);
+       /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
+       sysreg_clear_set(cnthctl_el2, clr, set);
 }
 
 void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
@@ -1563,7 +1563,7 @@ no_vgic:
 void kvm_timer_init_vhe(void)
 {
        if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
-               sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV);
+               sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
 }
 
 int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
index c2c1405..d1cb298 100644 (file)
@@ -53,11 +53,16 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
 DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
 
-static bool vgic_present;
+static bool vgic_present, kvm_arm_initialised;
 
-static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized);
 DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
 
+bool is_kvm_arm_initialised(void)
+{
+       return kvm_arm_initialised;
+}
+
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 {
        return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -713,13 +718,15 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
         */
        preempt_disable();
        kvm_vgic_vmcr_sync(vcpu);
-       vgic_v4_put(vcpu, true);
+       vcpu_set_flag(vcpu, IN_WFI);
+       vgic_v4_put(vcpu);
        preempt_enable();
 
        kvm_vcpu_halt(vcpu);
        vcpu_clear_flag(vcpu, IN_WFIT);
 
        preempt_disable();
+       vcpu_clear_flag(vcpu, IN_WFI);
        vgic_v4_load(vcpu);
        preempt_enable();
 }
@@ -787,7 +794,7 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
                if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
                        /* The distributor enable bits were changed */
                        preempt_disable();
-                       vgic_v4_put(vcpu, false);
+                       vgic_v4_put(vcpu);
                        vgic_v4_load(vcpu);
                        preempt_enable();
                }
@@ -1857,45 +1864,49 @@ static void cpu_hyp_reinit(void)
        cpu_hyp_init_features();
 }
 
-static void _kvm_arch_hardware_enable(void *discard)
+static void cpu_hyp_init(void *discard)
 {
-       if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+       if (!__this_cpu_read(kvm_hyp_initialized)) {
                cpu_hyp_reinit();
-               __this_cpu_write(kvm_arm_hardware_enabled, 1);
+               __this_cpu_write(kvm_hyp_initialized, 1);
+       }
+}
+
+static void cpu_hyp_uninit(void *discard)
+{
+       if (__this_cpu_read(kvm_hyp_initialized)) {
+               cpu_hyp_reset();
+               __this_cpu_write(kvm_hyp_initialized, 0);
        }
 }
 
 int kvm_arch_hardware_enable(void)
 {
-       int was_enabled = __this_cpu_read(kvm_arm_hardware_enabled);
+       /*
+        * Most calls to this function are made with migration
+        * disabled, but not with preemption disabled. The former is
+        * enough to ensure correctness, but most of the helpers
+        * expect the later and will throw a tantrum otherwise.
+        */
+       preempt_disable();
 
-       _kvm_arch_hardware_enable(NULL);
+       cpu_hyp_init(NULL);
 
-       if (!was_enabled) {
-               kvm_vgic_cpu_up();
-               kvm_timer_cpu_up();
-       }
+       kvm_vgic_cpu_up();
+       kvm_timer_cpu_up();
 
-       return 0;
-}
+       preempt_enable();
 
-static void _kvm_arch_hardware_disable(void *discard)
-{
-       if (__this_cpu_read(kvm_arm_hardware_enabled)) {
-               cpu_hyp_reset();
-               __this_cpu_write(kvm_arm_hardware_enabled, 0);
-       }
+       return 0;
 }
 
 void kvm_arch_hardware_disable(void)
 {
-       if (__this_cpu_read(kvm_arm_hardware_enabled)) {
-               kvm_timer_cpu_down();
-               kvm_vgic_cpu_down();
-       }
+       kvm_timer_cpu_down();
+       kvm_vgic_cpu_down();
 
        if (!is_protected_kvm_enabled())
-               _kvm_arch_hardware_disable(NULL);
+               cpu_hyp_uninit(NULL);
 }
 
 #ifdef CONFIG_CPU_PM
@@ -1904,16 +1915,16 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
                                    void *v)
 {
        /*
-        * kvm_arm_hardware_enabled is left with its old value over
+        * kvm_hyp_initialized is left with its old value over
         * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
         * re-enable hyp.
         */
        switch (cmd) {
        case CPU_PM_ENTER:
-               if (__this_cpu_read(kvm_arm_hardware_enabled))
+               if (__this_cpu_read(kvm_hyp_initialized))
                        /*
-                        * don't update kvm_arm_hardware_enabled here
-                        * so that the hardware will be re-enabled
+                        * don't update kvm_hyp_initialized here
+                        * so that the hyp will be re-enabled
                         * when we resume. See below.
                         */
                        cpu_hyp_reset();
@@ -1921,8 +1932,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
                return NOTIFY_OK;
        case CPU_PM_ENTER_FAILED:
        case CPU_PM_EXIT:
-               if (__this_cpu_read(kvm_arm_hardware_enabled))
-                       /* The hardware was enabled before suspend. */
+               if (__this_cpu_read(kvm_hyp_initialized))
+                       /* The hyp was enabled before suspend. */
                        cpu_hyp_reinit();
 
                return NOTIFY_OK;
@@ -2003,7 +2014,7 @@ static int __init init_subsystems(void)
        /*
         * Enable hardware so that subsystem initialisation can access EL2.
         */
-       on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
+       on_each_cpu(cpu_hyp_init, NULL, 1);
 
        /*
         * Register CPU lower-power notifier
@@ -2041,7 +2052,7 @@ out:
                hyp_cpu_pm_exit();
 
        if (err || !is_protected_kvm_enabled())
-               on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+               on_each_cpu(cpu_hyp_uninit, NULL, 1);
 
        return err;
 }
@@ -2079,7 +2090,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
         * The stub hypercalls are now disabled, so set our local flag to
         * prevent a later re-init attempt in kvm_arch_hardware_enable().
         */
-       __this_cpu_write(kvm_arm_hardware_enabled, 1);
+       __this_cpu_write(kvm_hyp_initialized, 1);
        preempt_enable();
 
        return ret;
@@ -2482,6 +2493,8 @@ static __init int kvm_arm_init(void)
        if (err)
                goto out_subs;
 
+       kvm_arm_initialised = true;
+
        return 0;
 
 out_subs:
index 8f3f93f..03f97d7 100644 (file)
@@ -154,6 +154,12 @@ SYM_CODE_END(\label)
        esb
        stp     x0, x1, [sp, #-16]!
 662:
+       /*
+        * spectre vectors __bp_harden_hyp_vecs generate br instructions at runtime
+        * that jump at offset 8 at __kvm_hyp_vector.
+        * As hyp .text is guarded section, it needs bti j.
+        */
+       bti j
        b       \target
 
 check_preamble_length 661b, 662b
@@ -165,6 +171,8 @@ check_preamble_length 661b, 662b
        nop
        stp     x0, x1, [sp, #-16]!
 662:
+       /* Check valid_vect */
+       bti j
        b       \target
 
 check_preamble_length 661b, 662b
index 4bddb85..34f222a 100644 (file)
@@ -457,6 +457,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
         */
        val &= ~(TCR_HD | TCR_HA);
        write_sysreg_el1(val, SYS_TCR);
+       __kvm_skip_instr(vcpu);
        return true;
 }
 
index 9ddc025..2250253 100644 (file)
@@ -25,7 +25,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o
         cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o
 hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
         ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
-hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o
 hyp-obj-y += $(lib-objs)
 
 ##
index 58dcd92..ab4f5d1 100644 (file)
@@ -705,7 +705,20 @@ int hyp_ffa_init(void *pages)
        if (res.a0 == FFA_RET_NOT_SUPPORTED)
                return 0;
 
-       if (res.a0 != FFA_VERSION_1_0)
+       /*
+        * Firmware returns the maximum supported version of the FF-A
+        * implementation. Check that the returned version is
+        * backwards-compatible with the hyp according to the rules in DEN0077A
+        * v1.1 REL0 13.2.1.
+        *
+        * Of course, things are never simple when dealing with firmware. v1.1
+        * broke ABI with v1.0 on several structures, which is itself
+        * incompatible with the aforementioned versioning scheme. The
+        * expectation is that v1.x implementations that do not support the v1.0
+        * ABI return NOT_SUPPORTED rather than a version number, according to
+        * DEN0077A v1.1 REL0 18.6.4.
+        */
+       if (FFA_MAJOR_VERSION(res.a0) != 1)
                return -EOPNOTSUPP;
 
        arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
index c87c631..7693a67 100644 (file)
@@ -297,3 +297,13 @@ SYM_CODE_START(__kvm_hyp_host_forward_smc)
 
        ret
 SYM_CODE_END(__kvm_hyp_host_forward_smc)
+
+/*
+ * kvm_host_psci_cpu_entry is called through br instruction, which requires
+ * bti j instruction as compilers (gcc and llvm) doesn't insert bti j for external
+ * functions, but bti c instead.
+ */
+SYM_CODE_START(kvm_host_psci_cpu_entry)
+       bti j
+       b __kvm_host_psci_cpu_entry
+SYM_CODE_END(kvm_host_psci_cpu_entry)
index d68abd7..46a2d4f 100644 (file)
@@ -26,8 +26,9 @@ static inline __must_check bool nvhe_check_data_corruption(bool v)
 
 /* The predicates checked here are taken from lib/list_debug.c. */
 
-bool __list_add_valid(struct list_head *new, struct list_head *prev,
-                     struct list_head *next)
+__list_valid_slowpath
+bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
+                               struct list_head *next)
 {
        if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) ||
            NVHE_CHECK_DATA_CORRUPTION(prev->next != next) ||
@@ -37,7 +38,8 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
        return true;
 }
 
-bool __list_del_entry_valid(struct list_head *entry)
+__list_valid_slowpath
+bool __list_del_entry_valid_or_report(struct list_head *entry)
 {
        struct list_head *prev, *next;
 
index 0850878..24543d2 100644 (file)
@@ -200,7 +200,7 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
                         __hyp_pa(init_params), 0);
 }
 
-asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
 {
        struct psci_boot_args *boot_args;
        struct kvm_cpu_context *host_ctxt;
index 0a62710..e89a231 100644 (file)
@@ -63,7 +63,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
                __activate_traps_fpsimd32(vcpu);
        }
 
-       write_sysreg(val, cptr_el2);
+       kvm_write_cptr_el2(val);
        write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
 
        if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
index aa740a9..f7a93ef 100644 (file)
@@ -1195,25 +1195,54 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
        return pte;
 }
 
-kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
+struct stage2_age_data {
+       bool    mkold;
+       bool    young;
+};
+
+static int stage2_age_walker(const struct kvm_pgtable_visit_ctx *ctx,
+                            enum kvm_pgtable_walk_flags visit)
 {
-       kvm_pte_t pte = 0;
-       stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
-                                &pte, NULL, 0);
+       kvm_pte_t new = ctx->old & ~KVM_PTE_LEAF_ATTR_LO_S2_AF;
+       struct stage2_age_data *data = ctx->arg;
+
+       if (!kvm_pte_valid(ctx->old) || new == ctx->old)
+               return 0;
+
+       data->young = true;
+
+       /*
+        * stage2_age_walker() is always called while holding the MMU lock for
+        * write, so this will always succeed. Nonetheless, this deliberately
+        * follows the race detection pattern of the other stage-2 walkers in
+        * case the locking mechanics of the MMU notifiers is ever changed.
+        */
+       if (data->mkold && !stage2_try_set_pte(ctx, new))
+               return -EAGAIN;
+
        /*
         * "But where's the TLBI?!", you scream.
         * "Over in the core code", I sigh.
         *
         * See the '->clear_flush_young()' callback on the KVM mmu notifier.
         */
-       return pte;
+       return 0;
 }
 
-bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
+bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
+                                        u64 size, bool mkold)
 {
-       kvm_pte_t pte = 0;
-       stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0);
-       return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
+       struct stage2_age_data data = {
+               .mkold          = mkold,
+       };
+       struct kvm_pgtable_walker walker = {
+               .cb             = stage2_age_walker,
+               .arg            = &data,
+               .flags          = KVM_PGTABLE_WALK_LEAF,
+       };
+
+       WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
+       return data.young;
 }
 
 int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
index 6db9ef2..d3b4fee 100644 (file)
@@ -1756,27 +1756,25 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
        u64 size = (range->end - range->start) << PAGE_SHIFT;
-       kvm_pte_t kpte;
-       pte_t pte;
 
        if (!kvm->arch.mmu.pgt)
                return false;
 
-       WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
-
-       kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt,
-                                       range->start << PAGE_SHIFT);
-       pte = __pte(kpte);
-       return pte_valid(pte) && pte_young(pte);
+       return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+                                                  range->start << PAGE_SHIFT,
+                                                  size, true);
 }
 
 bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
+       u64 size = (range->end - range->start) << PAGE_SHIFT;
+
        if (!kvm->arch.mmu.pgt)
                return false;
 
-       return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
-                                          range->start << PAGE_SHIFT);
+       return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+                                                  range->start << PAGE_SHIFT,
+                                                  size, false);
 }
 
 phys_addr_t kvm_mmu_get_httbr(void)
index 994a494..6ff3ec1 100644 (file)
@@ -244,7 +244,7 @@ static int __init finalize_pkvm(void)
 {
        int ret;
 
-       if (!is_protected_kvm_enabled())
+       if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
                return 0;
 
        /*
index bd34318..2ca2973 100644 (file)
@@ -986,7 +986,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 
        if (p->is_write) {
                kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
-               __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
                kvm_vcpu_pmu_restore_guest(vcpu);
        } else {
                p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
@@ -1115,18 +1114,19 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
        { SYS_DESC(SYS_DBGWCRn_EL1(n)),                                 \
          trap_wcr, reset_wcr, 0, 0,  get_wcr, set_wcr }
 
-#define PMU_SYS_REG(r)                                         \
-       SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility
+#define PMU_SYS_REG(name)                                              \
+       SYS_DESC(SYS_##name), .reset = reset_pmu_reg,                   \
+       .visibility = pmu_visibility
 
 /* Macro to expand the PMEVCNTRn_EL0 register */
 #define PMU_PMEVCNTR_EL0(n)                                            \
-       { PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)),                            \
+       { PMU_SYS_REG(PMEVCNTRn_EL0(n)),                                \
          .reset = reset_pmevcntr, .get_user = get_pmu_evcntr,          \
          .access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
 
 /* Macro to expand the PMEVTYPERn_EL0 register */
 #define PMU_PMEVTYPER_EL0(n)                                           \
-       { PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)),                           \
+       { PMU_SYS_REG(PMEVTYPERn_EL0(n)),                               \
          .reset = reset_pmevtyper,                                     \
          .access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
 
@@ -2115,9 +2115,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_PMBSR_EL1), undef_access },
        /* PMBIDR_EL1 is not trapped */
 
-       { PMU_SYS_REG(SYS_PMINTENSET_EL1),
+       { PMU_SYS_REG(PMINTENSET_EL1),
          .access = access_pminten, .reg = PMINTENSET_EL1 },
-       { PMU_SYS_REG(SYS_PMINTENCLR_EL1),
+       { PMU_SYS_REG(PMINTENCLR_EL1),
          .access = access_pminten, .reg = PMINTENSET_EL1 },
        { SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
 
@@ -2164,41 +2164,41 @@ static const struct sys_reg_desc sys_reg_descs[] = {
        { SYS_DESC(SYS_CTR_EL0), access_ctr },
        { SYS_DESC(SYS_SVCR), undef_access },
 
-       { PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr,
+       { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
          .reset = reset_pmcr, .reg = PMCR_EL0 },
-       { PMU_SYS_REG(SYS_PMCNTENSET_EL0),
+       { PMU_SYS_REG(PMCNTENSET_EL0),
          .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
-       { PMU_SYS_REG(SYS_PMCNTENCLR_EL0),
+       { PMU_SYS_REG(PMCNTENCLR_EL0),
          .access = access_pmcnten, .reg = PMCNTENSET_EL0 },
-       { PMU_SYS_REG(SYS_PMOVSCLR_EL0),
+       { PMU_SYS_REG(PMOVSCLR_EL0),
          .access = access_pmovs, .reg = PMOVSSET_EL0 },
        /*
         * PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
         * previously (and pointlessly) advertised in the past...
         */
-       { PMU_SYS_REG(SYS_PMSWINC_EL0),
+       { PMU_SYS_REG(PMSWINC_EL0),
          .get_user = get_raz_reg, .set_user = set_wi_reg,
          .access = access_pmswinc, .reset = NULL },
-       { PMU_SYS_REG(SYS_PMSELR_EL0),
+       { PMU_SYS_REG(PMSELR_EL0),
          .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
-       { PMU_SYS_REG(SYS_PMCEID0_EL0),
+       { PMU_SYS_REG(PMCEID0_EL0),
          .access = access_pmceid, .reset = NULL },
-       { PMU_SYS_REG(SYS_PMCEID1_EL0),
+       { PMU_SYS_REG(PMCEID1_EL0),
          .access = access_pmceid, .reset = NULL },
-       { PMU_SYS_REG(SYS_PMCCNTR_EL0),
+       { PMU_SYS_REG(PMCCNTR_EL0),
          .access = access_pmu_evcntr, .reset = reset_unknown,
          .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr},
-       { PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
+       { PMU_SYS_REG(PMXEVTYPER_EL0),
          .access = access_pmu_evtyper, .reset = NULL },
-       { PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
+       { PMU_SYS_REG(PMXEVCNTR_EL0),
          .access = access_pmu_evcntr, .reset = NULL },
        /*
         * PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero
         * in 32bit mode. Here we choose to reset it as zero for consistency.
         */
-       { PMU_SYS_REG(SYS_PMUSERENR_EL0), .access = access_pmuserenr,
+       { PMU_SYS_REG(PMUSERENR_EL0), .access = access_pmuserenr,
          .reset = reset_val, .reg = PMUSERENR_EL0, .val = 0 },
-       { PMU_SYS_REG(SYS_PMOVSSET_EL0),
+       { PMU_SYS_REG(PMOVSSET_EL0),
          .access = access_pmovs, .reg = PMOVSSET_EL0 },
 
        { SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
@@ -2354,7 +2354,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
         * PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero
         * in 32bit mode. Here we choose to reset it as zero for consistency.
         */
-       { PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper,
+       { PMU_SYS_REG(PMCCFILTR_EL0), .access = access_pmu_evtyper,
          .reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
 
        EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
index c3b8e13..3dfc8b8 100644 (file)
@@ -749,7 +749,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
 {
        struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
 
-       WARN_ON(vgic_v4_put(vcpu, false));
+       WARN_ON(vgic_v4_put(vcpu));
 
        vgic_v3_vmcr_sync(vcpu);
 
index c1c28fe..339a551 100644 (file)
@@ -336,14 +336,14 @@ void vgic_v4_teardown(struct kvm *kvm)
        its_vm->vpes = NULL;
 }
 
-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
+int vgic_v4_put(struct kvm_vcpu *vcpu)
 {
        struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
 
        if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
                return 0;
 
-       return its_make_vpe_non_resident(vpe, need_db);
+       return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI));
 }
 
 int vgic_v4_load(struct kvm_vcpu *vcpu)
@@ -354,6 +354,9 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
        if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
                return 0;
 
+       if (vcpu_get_flag(vcpu, IN_WFI))
+               return 0;
+
        /*
         * Before making the VPE resident, make sure the redistributor
         * corresponding to our current CPU expects us here. See the
index d31c3a9..4fcb88a 100644 (file)
@@ -73,6 +73,33 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit;
 
 #define DEFAULT_CRASH_KERNEL_LOW_SIZE  (128UL << 20)
 
+/*
+ * To make optimal use of block mappings when laying out the linear
+ * mapping, round down the base of physical memory to a size that can
+ * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE
+ * (64k granule), or a multiple that can be mapped using contiguous bits
+ * in the page tables: 32 * PMD_SIZE (16k granule)
+ */
+#if defined(CONFIG_ARM64_4K_PAGES)
+#define ARM64_MEMSTART_SHIFT           PUD_SHIFT
+#elif defined(CONFIG_ARM64_16K_PAGES)
+#define ARM64_MEMSTART_SHIFT           CONT_PMD_SHIFT
+#else
+#define ARM64_MEMSTART_SHIFT           PMD_SHIFT
+#endif
+
+/*
+ * sparsemem vmemmap imposes an additional requirement on the alignment of
+ * memstart_addr, due to the fact that the base of the vmemmap region
+ * has a direct correspondence, and needs to appear sufficiently aligned
+ * in the virtual address space.
+ */
+#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
+#define ARM64_MEMSTART_ALIGN   (1UL << SECTION_SIZE_BITS)
+#else
+#define ARM64_MEMSTART_ALIGN   (1UL << ARM64_MEMSTART_SHIFT)
+#endif
+
 static int __init reserve_crashkernel_low(unsigned long long low_size)
 {
        unsigned long long low_base;
index 2baeec4..14fdf64 100644 (file)
@@ -447,7 +447,7 @@ SYM_FUNC_START(__cpu_setup)
         * via capabilities.
         */
        mrs     x9, ID_AA64MMFR1_EL1
-       and     x9, x9, #0xf
+       and     x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK
        cbz     x9, 1f
        orr     tcr, tcr, #TCR_HA               // hardware Access flag update
 1:
index 4ea2eef..e9ad391 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/bug.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
+#include <linux/kfence.h>
 
 static void *trans_alloc(struct trans_pgd_info *info)
 {
@@ -41,7 +42,8 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
                 * the temporary mappings we use during restore.
                 */
                set_pte(dst_ptep, pte_mkwrite(pte));
-       } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
+       } else if ((debug_pagealloc_enabled() ||
+                  is_kfence_address((void *)addr)) && !pte_none(pte)) {
                /*
                 * debug_pagealloc will removed the PTE_VALID bit if
                 * the page isn't in use by the resume kernel. It may have
index 145b540..ec21748 100644 (file)
@@ -322,7 +322,13 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
         *
         */
 
-       emit_bti(A64_BTI_C, ctx);
+       /* bpf function may be invoked by 3 instruction types:
+        * 1. bl, attached via freplace to bpf prog via short jump
+        * 2. br, attached via freplace to bpf prog via long jump
+        * 3. blr, working as a function pointer, used by emit_call.
+        * So BTI_JC should used here to support both br and blr.
+        */
+       emit_bti(A64_BTI_JC, ctx);
 
        emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
        emit(A64_NOP, ctx);
index 1ea4a3d..65866bf 100644 (file)
@@ -2017,7 +2017,7 @@ Field     0       SM
 EndSysreg
 
 SysregFields   HFGxTR_EL2
-Field  63      nAMIAIR2_EL1
+Field  63      nAMAIR2_EL1
 Field  62      nMAIR2_EL1
 Field  61      nS2POR_EL1
 Field  60      nPOR_EL1
@@ -2032,9 +2032,9 @@ Field     52      nGCS_EL0
 Res0   51
 Field  50      nACCDATA_EL1
 Field  49      ERXADDR_EL1
-Field  48      EXRPFGCDN_EL1
-Field  47      EXPFGCTL_EL1
-Field  46      EXPFGF_EL1
+Field  48      ERXPFGCDN_EL1
+Field  47      ERXPFGCTL_EL1
+Field  46      ERXPFGF_EL1
 Field  45      ERXMISCn_EL1
 Field  44      ERXSTATUS_EL1
 Field  43      ERXCTLR_EL1
@@ -2049,8 +2049,8 @@ Field     35      TPIDR_EL0
 Field  34      TPIDRRO_EL0
 Field  33      TPIDR_EL1
 Field  32      TCR_EL1
-Field  31      SCTXNUM_EL0
-Field  30      SCTXNUM_EL1
+Field  31      SCXTNUM_EL0
+Field  30      SCXTNUM_EL1
 Field  29      SCTLR_EL1
 Field  28      REVIDR_EL1
 Field  27      PAR_EL1
index 3e1337a..7cb96db 100644 (file)
@@ -77,7 +77,7 @@ CONFIG_EXT3_FS=y
 CONFIG_XFS_FS=y
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_UDF_FS=m
index f8033ba..4581240 100644 (file)
@@ -146,7 +146,7 @@ CONFIG_REISERFS_FS_XATTR=y
 CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_UDF_FS=m
index ffebe6c..c9e8066 100644 (file)
@@ -127,7 +127,7 @@ CONFIG_REISERFS_FS_XATTR=y
 CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_UDF_FS=m
index 45f5d6e..d7d8fb5 100644 (file)
@@ -110,7 +110,7 @@ CONFIG_REISERFS_FS_XATTR=y
 CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_UDF_FS=m
index 87927eb..58500a9 100644 (file)
@@ -11,7 +11,7 @@
 
 #ifdef __KERNEL__
 
-#include <acpi/pdc_intel.h>
+#include <acpi/proc_cap_intel.h>
 
 #include <linux/init.h>
 #include <linux/numa.h>
@@ -69,9 +69,9 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
 #endif
 
 static inline bool arch_has_acpi_pdc(void) { return true; }
-static inline void arch_acpi_set_pdc_bits(u32 *buf)
+static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 {
-       buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
+       *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SMP;
 }
 
 #ifdef CONFIG_ACPI_NUMA
index d1978e0..47e3801 100644 (file)
@@ -634,7 +634,6 @@ ia64_imva (void *addr)
 
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 #define PREFETCH_STRIDE                        L1_CACHE_BYTES
 
 static inline void
@@ -649,8 +648,6 @@ prefetchw (const void *x)
        ia64_lfetch_excl(ia64_lfhint_none, x);
 }
 
-#define spin_lock_prefetch(x)  prefetchw(x)
-
 extern unsigned long boot_option_idle_override;
 
 enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT,
index 6e948d0..eb561cc 100644 (file)
@@ -63,7 +63,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
        info.low_limit = addr;
        info.high_limit = TASK_SIZE;
        info.align_mask = align_mask;
-       info.align_offset = 0;
+       info.align_offset = pgoff << PAGE_SHIFT;
        return vm_unmapped_area(&info);
 }
 
index f8c74ff..83d8609 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index e55511a..465759f 100644 (file)
@@ -14,6 +14,7 @@ config LOONGARCH
        select ARCH_HAS_CPU_FINALIZE_INIT
        select ARCH_HAS_FORTIFY_SOURCE
        select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
+       select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        select ARCH_HAS_PTE_SPECIAL
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
        select ARCH_INLINE_READ_LOCK if !PREEMPTION
@@ -661,5 +662,3 @@ source "kernel/power/Kconfig"
 source "drivers/acpi/Kconfig"
 
 endmenu
-
-source "drivers/firmware/Kconfig"
index 09ba338..ef87bab 100644 (file)
@@ -68,6 +68,8 @@ LDFLAGS_vmlinux                       += -static -n -nostdlib
 ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
 cflags-y                       += $(call cc-option,-mexplicit-relocs)
 KBUILD_CFLAGS_KERNEL           += $(call cc-option,-mdirect-extern-access)
+KBUILD_AFLAGS_MODULE           += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
+KBUILD_CFLAGS_MODULE           += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
 else
 cflags-y                       += $(call cc-option,-mno-explicit-relocs)
 KBUILD_AFLAGS_KERNEL           += -Wa,-mla-global-with-pcrel
@@ -81,8 +83,8 @@ KBUILD_CFLAGS_KERNEL          += -fPIE
 LDFLAGS_vmlinux                        += -static -pie --no-dynamic-linker -z notext
 endif
 
-cflags-y += -ffreestanding
 cflags-y += $(call cc-option, -mno-check-zero-division)
+cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
 
 load-y         = 0x9000000000200000
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
@@ -111,7 +113,7 @@ KBUILD_CFLAGS += -isystem $(shell $(CC) -print-file-name=include)
 
 KBUILD_LDFLAGS += -m $(ld-emul)
 
-ifdef CONFIG_LOONGARCH
+ifdef need-compiler
 CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
        grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
        sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
index 6cd26dd..d64849b 100644 (file)
@@ -769,7 +769,7 @@ CONFIG_QUOTA=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
 CONFIG_OVERLAY_FS=y
 CONFIG_OVERLAY_FS_INDEX=y
index 6b222f2..93783fa 100644 (file)
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += dma-contiguous.h
-generic-y += export.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h
index e4193d6..c2d8962 100644 (file)
@@ -173,16 +173,30 @@ static inline void restore_fp(struct task_struct *tsk)
                _restore_fp(&tsk->thread.fpu);
 }
 
-static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
+static inline void save_fpu_regs(struct task_struct *tsk)
 {
+       unsigned int euen;
+
        if (tsk == current) {
                preempt_disable();
-               if (is_fpu_owner())
+
+               euen = csr_read32(LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+               if (euen & CSR_EUEN_LASXEN)
+                       _save_lasx(&current->thread.fpu);
+               else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+               if (euen & CSR_EUEN_LSXEN)
+                       _save_lsx(&current->thread.fpu);
+               else
+#endif
+               if (euen & CSR_EUEN_FPEN)
                        _save_fp(&current->thread.fpu);
+
                preempt_enable();
        }
-
-       return tsk->thread.fpu.fpr;
 }
 
 static inline int is_simd_owner(void)
@@ -218,15 +232,8 @@ static inline void restore_lsx(struct task_struct *t)
 
 static inline void init_lsx_upper(void)
 {
-       /*
-        * Check cpu_has_lsx only if it's a constant. This will allow the
-        * compiler to optimise out code for CPUs without LSX without adding
-        * an extra redundant check for CPUs with LSX.
-        */
-       if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
-               return;
-
-       _init_lsx_upper();
+       if (cpu_has_lsx)
+               _init_lsx_upper();
 }
 
 static inline void restore_lsx_upper(struct task_struct *t)
@@ -294,7 +301,7 @@ static inline void restore_lasx_upper(struct task_struct *t) {}
 
 static inline int thread_lsx_context_live(void)
 {
-       if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
+       if (!cpu_has_lsx)
                return 0;
 
        return test_thread_flag(TIF_LSX_CTX_LIVE);
@@ -302,7 +309,7 @@ static inline int thread_lsx_context_live(void)
 
 static inline int thread_lasx_context_live(void)
 {
-       if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx)
+       if (!cpu_has_lasx)
                return 0;
 
        return test_thread_flag(TIF_LASX_CTX_LIVE);
index 83e995b..c496758 100644 (file)
@@ -63,8 +63,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new)
 
 static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
 {
-       typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
-       return try_cmpxchg_local(&l->a.counter, __old, new);
+       return try_cmpxchg_local(&l->a.counter,
+                                (typeof(l->a.counter) *) old, new);
 }
 
 #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
index 35f0958..f3ddaed 100644 (file)
@@ -162,7 +162,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val
 #define instruction_pointer(regs) ((regs)->csr_era)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern void die(const char *, struct pt_regs *) __noreturn;
+extern void die(const char *str, struct pt_regs *regs);
 
 static inline void die_if_kernel(const char *str, struct pt_regs *regs)
 {
index 416b653..66ecb48 100644 (file)
@@ -98,8 +98,6 @@ static inline void __cpu_die(unsigned int cpu)
 {
        loongson_cpu_die(cpu);
 }
-
-extern void __noreturn play_dead(void);
 #endif
 
 #endif /* __ASM_SMP_H */
index f3df5f0..501094a 100644 (file)
@@ -6,12 +6,12 @@
  *
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
+#include <linux/export.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 #include <asm/regdef.h>
index 021b59c..fc55c4d 100644 (file)
@@ -207,8 +207,7 @@ static int hw_breakpoint_control(struct perf_event *bp,
                        write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
                } else {
                        ctrl = encode_ctrl_reg(info->ctrl);
-                       write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE |
-                                    1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn);
+                       write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE);
                }
                enable = csr_read64(LOONGARCH_CSR_CRMD);
                csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);
index cb8e580..3015896 100644 (file)
@@ -5,7 +5,7 @@
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
 
-#include <asm/export.h>
+#include <linux/export.h>
 #include <asm/ftrace.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
index e16ab0b..482aa55 100644 (file)
@@ -3,7 +3,6 @@
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
 
-#include <asm/export.h>
 #include <asm/ftrace.h>
 #include <asm/regdef.h>
 #include <asm/stackframe.h>
index 2e04eb0..4ee1e9d 100644 (file)
@@ -61,13 +61,6 @@ EXPORT_SYMBOL(__stack_chk_guard);
 unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
 EXPORT_SYMBOL(boot_option_idle_override);
 
-#ifdef CONFIG_HOTPLUG_CPU
-void __noreturn arch_cpu_idle_dead(void)
-{
-       play_dead();
-}
-#endif
-
 asmlinkage void ret_from_fork(void);
 asmlinkage void ret_from_kernel_thread(void);
 
index a0767c3..f72adbf 100644 (file)
@@ -147,6 +147,8 @@ static int fpr_get(struct task_struct *target,
 {
        int r;
 
+       save_fpu_regs(target);
+
        if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
                r = gfpr_get(target, &to);
        else
@@ -278,6 +280,8 @@ static int simd_get(struct task_struct *target,
 {
        const unsigned int wr_size = NUM_FPU_REGS * regset->size;
 
+       save_fpu_regs(target);
+
        if (!tsk_used_math(target)) {
                /* The task hasn't used FP or LSX, fill with 0xff */
                copy_pad_fprs(target, regset, &to, 0);
index 78a0035..9d830ab 100644 (file)
@@ -332,9 +332,25 @@ static void __init bootcmdline_init(char **cmdline_p)
                        strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
 
                strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE);
+               goto out;
        }
 #endif
 
+       /*
+        * Append built-in command line to the bootloader command line if
+        * CONFIG_CMDLINE_EXTEND is enabled.
+        */
+       if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) && CONFIG_CMDLINE[0]) {
+               strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+               strlcat(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+       }
+
+       /*
+        * Use built-in command line if the bootloader command line is empty.
+        */
+       if (IS_ENABLED(CONFIG_CMDLINE_BOOTLOADER) && !boot_command_line[0])
+               strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+
 out:
        *cmdline_p = boot_command_line;
 }
index 8ea1bbc..6667b0a 100644 (file)
@@ -317,7 +317,7 @@ void loongson_cpu_die(unsigned int cpu)
        mb();
 }
 
-void play_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
 {
        register uint64_t addr;
        register void (*init_fn)(void);
index 8fb5e7a..89699db 100644 (file)
@@ -383,16 +383,15 @@ void show_registers(struct pt_regs *regs)
 
 static DEFINE_RAW_SPINLOCK(die_lock);
 
-void __noreturn die(const char *str, struct pt_regs *regs)
+void die(const char *str, struct pt_regs *regs)
 {
+       int ret;
        static int die_counter;
-       int sig = SIGSEGV;
 
        oops_enter();
 
-       if (notify_die(DIE_OOPS, str, regs, 0, current->thread.trap_nr,
-                      SIGSEGV) == NOTIFY_STOP)
-               sig = 0;
+       ret = notify_die(DIE_OOPS, str, regs, 0,
+                        current->thread.trap_nr, SIGSEGV);
 
        console_verbose();
        raw_spin_lock_irq(&die_lock);
@@ -405,6 +404,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
 
        oops_exit();
 
+       if (ret == NOTIFY_STOP)
+               return;
+
        if (regs && kexec_should_crash(current))
                crash_kexec(regs);
 
@@ -414,7 +416,7 @@ void __noreturn die(const char *str, struct pt_regs *regs)
        if (panic_on_oops)
                panic("Fatal exception");
 
-       make_task_dead(sig);
+       make_task_dead(SIGSEGV);
 }
 
 static inline void setup_vint_size(unsigned int size)
index fd1d62b..0790ead 100644 (file)
@@ -3,12 +3,12 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .irp to, 0, 1, 2, 3, 4, 5, 6, 7
@@ -108,6 +108,7 @@ SYM_FUNC_START(__clear_user_fast)
        addi.d  a3, a2, -8
        bgeu    a0, a3, .Llt8
 15:    st.d    zero, a0, 0
+       addi.d  a0, a0, 8
 
 .Llt8:
 16:    st.d    zero, a2, -8
@@ -188,7 +189,7 @@ SYM_FUNC_START(__clear_user_fast)
        _asm_extable 13b, .L_fixup_handle_0
        _asm_extable 14b, .L_fixup_handle_1
        _asm_extable 15b, .L_fixup_handle_0
-       _asm_extable 16b, .L_fixup_handle_1
+       _asm_extable 16b, .L_fixup_handle_0
        _asm_extable 17b, .L_fixup_handle_s0
        _asm_extable 18b, .L_fixup_handle_s0
        _asm_extable 19b, .L_fixup_handle_s0
index b21f6d5..bfe3d27 100644 (file)
@@ -3,12 +3,12 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .irp to, 0, 1, 2, 3, 4, 5, 6, 7
@@ -136,6 +136,7 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt8
 30:    ld.d    t0, a1, 0
 31:    st.d    t0, a0, 0
+       addi.d  a0, a0, 8
 
 .Llt8:
 32:    ld.d    t0, a3, -8
@@ -246,7 +247,7 @@ SYM_FUNC_START(__copy_user_fast)
        _asm_extable 30b, .L_fixup_handle_0
        _asm_extable 31b, .L_fixup_handle_0
        _asm_extable 32b, .L_fixup_handle_0
-       _asm_extable 33b, .L_fixup_handle_1
+       _asm_extable 33b, .L_fixup_handle_0
        _asm_extable 34b, .L_fixup_handle_s0
        _asm_extable 35b, .L_fixup_handle_s0
        _asm_extable 36b, .L_fixup_handle_s0
index 39ce662..cc30b3b 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 SYM_FUNC_START(memcpy)
index 45b725b..7dc76d1 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 SYM_FUNC_START(memmove)
index b39c619..3f20f79 100644 (file)
@@ -3,11 +3,11 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <linux/export.h>
 #include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/cpu.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .macro fill_to_64 r0
index 9177fd6..185f82d 100644 (file)
@@ -9,7 +9,6 @@
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
 #include <asm/errno.h>
-#include <asm/export.h>
 #include <asm/regdef.h>
 
 .L_fixup_handle_unaligned:
index 4c874a7..7ad7655 100644 (file)
@@ -2,9 +2,9 @@
 /*
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/page.h>
 #include <asm/regdef.h>
 
index 4ad7870..ca17dd3 100644 (file)
@@ -3,7 +3,6 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <asm/asm.h>
-#include <asm/export.h>
 #include <asm/loongarch.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
index c335dc4..6858633 100644 (file)
@@ -150,7 +150,7 @@ static inline void move_imm(struct jit_ctx *ctx, enum loongarch_gpr rd, long imm
                         * no need to call lu32id to do a new filled operation.
                         */
                        imm_51_31 = (imm >> 31) & 0x1fffff;
-                       if (imm_51_31 != 0 || imm_51_31 != 0x1fffff) {
+                       if (imm_51_31 != 0 && imm_51_31 != 0x1fffff) {
                                /* lu32id rd, imm_51_32 */
                                imm_51_32 = (imm >> 32) & 0xfffff;
                                emit_insn(ctx, lu32id, rd, imm_51_32);
index 4383ed8..6deb8fa 100644 (file)
@@ -591,7 +591,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -635,6 +634,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index ec0f9c9..802c161 100644 (file)
@@ -548,7 +548,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -591,6 +590,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 8656ae1..2cb3d75 100644 (file)
@@ -568,7 +568,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -612,6 +611,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 496fb6a..b13552c 100644 (file)
@@ -540,7 +540,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -583,6 +582,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 4add7ab..f88356c 100644 (file)
@@ -550,7 +550,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -593,6 +592,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 5845f1f..7c2ebb6 100644 (file)
@@ -570,7 +570,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -614,6 +613,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index bbb251b..d3b2729 100644 (file)
@@ -656,7 +656,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -700,6 +699,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 4f9cfc7..4529bc4 100644 (file)
@@ -539,7 +539,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -582,6 +581,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 67c42b4..3082403 100644 (file)
@@ -540,7 +540,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -583,6 +582,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 85f1951..3911211 100644 (file)
@@ -557,7 +557,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -601,6 +600,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index b1b15ac..991730c 100644 (file)
@@ -538,7 +538,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -580,6 +579,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 91d66c0..e80d750 100644 (file)
@@ -538,7 +538,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -581,6 +580,7 @@ CONFIG_TEST_SCANF=m
 CONFIG_TEST_BITMAP=m
 CONFIG_TEST_UUID=m
 CONFIG_TEST_XARRAY=m
+CONFIG_TEST_MAPLE_TREE=m
 CONFIG_TEST_RHASHTABLE=m
 CONFIG_TEST_IDA=m
 CONFIG_TEST_BITOPS=m
index 439395a..081922c 100644 (file)
@@ -499,13 +499,13 @@ in_ea:
        dbf     %d0,morein
        rts
 
-       .section .fixup,#alloc,#execinstr
+       .section .fixup,"ax"
        .even
 1:
        jbsr    fpsp040_die
        jbra    .Lnotkern
 
-       .section __ex_table,#alloc
+       .section __ex_table,"a"
        .align  4
 
        .long   in_ea,1b
index 7a0d6e4..89e2ec2 100644 (file)
@@ -379,11 +379,11 @@ _060_real_access:
 
 
 | Execption handling for movs access to illegal memory
-       .section .fixup,#alloc,#execinstr
+       .section .fixup,"ax"
        .even
 1:     moveq           #-1,%d1
        rts
-.section __ex_table,#alloc
+.section __ex_table,"a"
        .align 4
        .long   dmrbuae,1b
        .long   dmrwuae,1b
index 1b72029..0dbf9c5 100644 (file)
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
-generic-y += export.h
 generic-y += extable.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
index 365f39f..df1f6b4 100644 (file)
@@ -31,6 +31,9 @@
        __rem;                                                  \
 })
 
+/* defining this stops the unused helper function from being built */
+#define __div64_32 __div64_32
+
 #endif /* CONFIG_CPU_HAS_NO_MULDIV64 */
 
 #endif /* _M68K_DIV64_H */
index f0f5021..760cc13 100644 (file)
@@ -41,6 +41,7 @@ static inline char *strncpy(char *dest, const char *src, size_t n)
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *, const void *, __kernel_size_t);
 
+extern int memcmp(const void *, const void *, __kernel_size_t);
 #define memcmp(d, s, n) __builtin_memcmp(d, s, n)
 
 #define __HAVE_ARCH_MEMSET
index ab0f1e7..f766707 100644 (file)
@@ -26,7 +26,7 @@ ENTRY(relocate_new_kernel)
        lea %pc@(.Lcopy),%a4
 2:     addl #0x00000000,%a4            /* virt_to_phys() */
 
-       .section ".m68k_fixup","aw"
+       .section .m68k_fixup,"aw"
        .long M68K_FIXUP_MEMOFFSET, 2b+2
        .previous
 
@@ -49,7 +49,7 @@ ENTRY(relocate_new_kernel)
        lea %pc@(.Lcont040),%a4
 5:     addl #0x00000000,%a4            /* virt_to_phys() */
 
-       .section ".m68k_fixup","aw"
+       .section .m68k_fixup,"aw"
        .long M68K_FIXUP_MEMOFFSET, 5b+2
        .previous
 
index 4f50478..259ceb1 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index 3a2143f..62787b4 100644 (file)
@@ -33,7 +33,7 @@ General Public License for more details. */
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* These are predefined by new versions of GNU cpp.  */
 
index 1c96764..1bcb742 100644 (file)
@@ -33,7 +33,7 @@ General Public License for more details. */
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
 
-#include <asm/export.h>
+#include <linux/export.h>
 
 /* These are predefined by new versions of GNU cpp.  */
 
index 855675e..c285324 100644 (file)
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-#include <asm/export.h>
+#include <linux/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
index 78440ae..39ad705 100644 (file)
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-#include <asm/export.h>
+#include <linux/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
index b6fd11f..6640eaa 100644 (file)
@@ -32,7 +32,7 @@ General Public License for more details. */
    Some of this code comes from MINIX, via the folks at ericsson.
    D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992
 */
-#include <asm/export.h>
+#include <linux/export.h>
 /* These are predefined by new versions of GNU cpp.  */
 
 #ifndef __USER_LABEL_PREFIX__
index 858d22b..a3798c2 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index b0cbfa7..e463a9a 100644 (file)
@@ -153,7 +153,7 @@ CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index 5458573..1843468 100644 (file)
@@ -178,7 +178,7 @@ CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
 CONFIG_REISERFS_FS=m
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=y
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index 897e555..44821f4 100644 (file)
@@ -245,7 +245,7 @@ CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index 418a418..930c5f6 100644 (file)
@@ -95,7 +95,7 @@ CONFIG_EXT3_FS_SECURITY=y
 CONFIG_QUOTA=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index 6b5d7e9..fdf3745 100644 (file)
@@ -76,7 +76,7 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_QUOTA=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index 7e598d3..83d9a8f 100644 (file)
@@ -240,7 +240,7 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_BTRFS_FS=m
 CONFIG_QUOTA=y
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=m
index 0ab029e..ec3ee8d 100644 (file)
@@ -296,7 +296,7 @@ CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_QUOTA=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index 3087e64..1294263 100644 (file)
@@ -352,7 +352,7 @@ CONFIG_QUOTA=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
 CONFIG_VIRTIO_FS=m
 CONFIG_FSCACHE=m
index b641721..935585d 100644 (file)
@@ -601,7 +601,7 @@ CONFIG_EXT3_FS=m
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
 CONFIG_QUOTA=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index 8397f28..0e494c2 100644 (file)
@@ -66,7 +66,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_FSCACHE=m
 CONFIG_ISO9660_FS=m
index 7475c2c..e0e312d 100644 (file)
@@ -317,7 +317,7 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_QUOTA=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
index 5daf6fe..e6ae3df 100644 (file)
@@ -101,8 +101,8 @@ static __inline__ long local_cmpxchg(local_t *l, long old, long new)
 
 static __inline__ bool local_try_cmpxchg(local_t *l, long *old, long new)
 {
-       typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
-       return try_cmpxchg_local(&l->a.counter, __old, new);
+       return try_cmpxchg_local(&l->a.counter,
+                                (typeof(l->a.counter) *) old, new);
 }
 
 #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n)))
index 9151dcd..af9cea2 100644 (file)
@@ -58,8 +58,6 @@
 
 #define cpu_has_rixi           (cpu_data[0].cputype != CPU_CAVIUM_OCTEON)
 
-#define ARCH_HAS_SPINLOCK_PREFETCH 1
-#define spin_lock_prefetch(x) prefetch(x)
 #define PREFETCH_STRIDE 128
 
 #ifdef __OCTEON__
index 1976317..152034b 100644 (file)
 449    n32     futex_waitv                     sys_futex_waitv
 450    n32     set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    n32     cachestat                       sys_cachestat
+452    n32     fchmodat2                       sys_fchmodat2
index cfda251..cb5e757 100644 (file)
 449    n64     futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    n64     cachestat                       sys_cachestat
+452    n64     fchmodat2                       sys_fchmodat2
index 7692234..1a64681 100644 (file)
 449    o32     futex_waitv                     sys_futex_waitv
 450    o32     set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    o32     cachestat                       sys_cachestat
+452    o32     fchmodat2                       sys_fchmodat2
index ca585e4..e7ffb58 100644 (file)
 
 struct sigcontext {
        struct user_regs_struct regs;  /* needs to be first */
-       struct __or1k_fpu_state fpu;
-       unsigned long oldmask;
+       union {
+               unsigned long fpcsr;
+               unsigned long oldmask;  /* unused */
+       };
 };
 
 #endif /* __ASM_OPENRISC_SIGCONTEXT_H */
index 4664a18..2e7257a 100644 (file)
@@ -50,7 +50,7 @@ static int restore_sigcontext(struct pt_regs *regs,
        err |= __copy_from_user(regs, sc->regs.gpr, 32 * sizeof(unsigned long));
        err |= __copy_from_user(&regs->pc, &sc->regs.pc, sizeof(unsigned long));
        err |= __copy_from_user(&regs->sr, &sc->regs.sr, sizeof(unsigned long));
-       err |= __copy_from_user(&regs->fpcsr, &sc->fpu.fpcsr, sizeof(unsigned long));
+       err |= __copy_from_user(&regs->fpcsr, &sc->fpcsr, sizeof(unsigned long));
 
        /* make sure the SM-bit is cleared so user-mode cannot fool us */
        regs->sr &= ~SPR_SR_SM;
@@ -113,7 +113,7 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
        err |= __copy_to_user(sc->regs.gpr, regs, 32 * sizeof(unsigned long));
        err |= __copy_to_user(&sc->regs.pc, &regs->pc, sizeof(unsigned long));
        err |= __copy_to_user(&sc->regs.sr, &regs->sr, sizeof(unsigned long));
-       err |= __copy_to_user(&sc->fpu.fpcsr, &regs->fpcsr, sizeof(unsigned long));
+       err |= __copy_to_user(&sc->fpcsr, &regs->fpcsr, sizeof(unsigned long));
 
        return err;
 }
index 1401e4c..bf2b21b 100644 (file)
@@ -2,7 +2,7 @@
 #
 config LIGHTWEIGHT_SPINLOCK_CHECK
        bool "Enable lightweight spinlock checks"
-       depends on SMP && !DEBUG_SPINLOCK
+       depends on DEBUG_KERNEL && SMP && !DEBUG_SPINLOCK
        default y
        help
          Add checks with low performance impact to the spinlock functions
index 7ee49f5..d389359 100644 (file)
@@ -117,7 +117,7 @@ char *strchr(const char *s, int c)
        return NULL;
 }
 
-int puts(const char *s)
+static int puts(const char *s)
 {
        const char *nuline = s;
 
@@ -172,7 +172,7 @@ static int print_num(unsigned long num, int base)
        return 0;
 }
 
-int printf(const char *fmt, ...)
+static int printf(const char *fmt, ...)
 {
        va_list args;
        int i = 0;
@@ -204,13 +204,13 @@ void abort(void)
 }
 
 #undef malloc
-void *malloc(size_t size)
+static void *malloc(size_t size)
 {
        return malloc_gzip(size);
 }
 
 #undef free
-void free(void *ptr)
+static void free(void *ptr)
 {
        return free_gzip(ptr);
 }
@@ -278,7 +278,7 @@ static void parse_elf(void *output)
        free(phdrs);
 }
 
-unsigned long decompress_kernel(unsigned int started_wide,
+asmlinkage unsigned long __visible decompress_kernel(unsigned int started_wide,
                unsigned int command_line,
                const unsigned int rd_start,
                const unsigned int rd_end)
index 9651f43..ee4febb 100644 (file)
@@ -237,7 +237,7 @@ CONFIG_EXT3_FS_SECURITY=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_VFAT_FS=y
index 6758c03..f6ded71 100644 (file)
@@ -259,7 +259,7 @@ CONFIG_BTRFS_FS=m
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=y
 CONFIG_ISO9660_FS=y
index 9e8c101..582fb5d 100644 (file)
@@ -14,6 +14,8 @@
 #define dma_outb       outb
 #define dma_inb                inb
 
+extern unsigned long pcxl_dma_start;
+
 /*
 ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
 ** (or rather not merge) DMAs into manageable chunks.
index a7cf0d0..f1cc1ee 100644 (file)
@@ -12,6 +12,10 @@ extern void mcount(void);
 extern unsigned long sys_call_table[];
 
 extern unsigned long return_address(unsigned int);
+struct ftrace_regs;
+extern void ftrace_function_trampoline(unsigned long parent,
+               unsigned long self_addr, unsigned long org_sp_gr3,
+               struct ftrace_regs *fregs);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern void ftrace_caller(void);
index edfcb98..0b326e5 100644 (file)
@@ -7,8 +7,6 @@
 #include <asm/processor.h>
 #include <asm/spinlock_types.h>
 
-#define SPINLOCK_BREAK_INSN    0x0000c006      /* break 6,6 */
-
 static inline void arch_spin_val_check(int lock_val)
 {
        if (IS_ENABLED(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK))
index d659340..efd06a8 100644 (file)
@@ -4,6 +4,10 @@
 
 #define __ARCH_SPIN_LOCK_UNLOCKED_VAL  0x1a46
 
+#define SPINLOCK_BREAK_INSN    0x0000c006      /* break 6,6 */
+
+#ifndef __ASSEMBLY__
+
 typedef struct {
 #ifdef CONFIG_PA20
        volatile unsigned int slock;
@@ -27,6 +31,8 @@ typedef struct {
        volatile unsigned int   counter;
 } arch_rwlock_t;
 
+#endif /* __ASSEMBLY__ */
+
 #define __ARCH_RW_LOCK_UNLOCKED__       0x01000000
 #define __ARCH_RW_LOCK_UNLOCKED         { .lock_mutex = __ARCH_SPIN_LOCK_UNLOCKED, \
                                        .counter = __ARCH_RW_LOCK_UNLOCKED__ }
index 0e5ebfe..ae03b86 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/traps.h>
 #include <asm/thread_info.h>
 #include <asm/alternative.h>
+#include <asm/spinlock_types.h>
 
 #include <linux/linkage.h>
 #include <linux/pgtable.h>
        LDREG           0(\ptp),\pte
        bb,<,n          \pte,_PAGE_PRESENT_BIT,3f
        b               \fault
-       stw             \spc,0(\tmp)
+       stw             \tmp1,0(\tmp)
 99:    ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
 2:     LDREG           0(\ptp),\pte
        .endm
 
        /* Release page_table_lock without reloading lock address.
-          Note that the values in the register spc are limited to
-          NR_SPACE_IDS (262144). Thus, the stw instruction always
-          stores a nonzero value even when register spc is 64 bits.
           We use an ordered store to ensure all prior accesses are
           performed prior to releasing the lock. */
-       .macro          ptl_unlock0     spc,tmp
+       .macro          ptl_unlock0     spc,tmp,tmp2
 #ifdef CONFIG_TLB_PTLOCK
-98:    or,COND(=)      %r0,\spc,%r0
-       stw,ma          \spc,0(\tmp)
+98:    ldi             __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmp2
+       or,COND(=)      %r0,\spc,%r0
+       stw,ma          \tmp2,0(\tmp)
 99:    ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
        .endm
 
        /* Release page_table_lock. */
-       .macro          ptl_unlock1     spc,tmp
+       .macro          ptl_unlock1     spc,tmp,tmp2
 #ifdef CONFIG_TLB_PTLOCK
 98:    get_ptl         \tmp
-       ptl_unlock0     \spc,\tmp
+       ptl_unlock0     \spc,\tmp,\tmp2
 99:    ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
 #endif
        .endm
@@ -1125,7 +1124,7 @@ dtlb_miss_20w:
        
        idtlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1151,7 +1150,7 @@ nadtlb_miss_20w:
 
        idtlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1185,7 +1184,7 @@ dtlb_miss_11:
 
        mtsp            t1, %sr1        /* Restore sr1 */
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1218,7 +1217,7 @@ nadtlb_miss_11:
 
        mtsp            t1, %sr1        /* Restore sr1 */
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1247,7 +1246,7 @@ dtlb_miss_20:
 
        idtlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1275,7 +1274,7 @@ nadtlb_miss_20:
        
        idtlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1320,7 +1319,7 @@ itlb_miss_20w:
        
        iitlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1344,7 +1343,7 @@ naitlb_miss_20w:
 
        iitlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1378,7 +1377,7 @@ itlb_miss_11:
 
        mtsp            t1, %sr1        /* Restore sr1 */
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1402,7 +1401,7 @@ naitlb_miss_11:
 
        mtsp            t1, %sr1        /* Restore sr1 */
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1432,7 +1431,7 @@ itlb_miss_20:
 
        iitlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1452,7 +1451,7 @@ naitlb_miss_20:
 
        iitlbt          pte,prot
 
-       ptl_unlock1     spc,t0
+       ptl_unlock1     spc,t0,t1
        rfir
        nop
 
@@ -1482,7 +1481,7 @@ dbit_trap_20w:
                
        idtlbt          pte,prot
 
-       ptl_unlock0     spc,t0
+       ptl_unlock0     spc,t0,t1
        rfir
        nop
 #else
@@ -1508,7 +1507,7 @@ dbit_trap_11:
 
        mtsp            t1, %sr1     /* Restore sr1 */
 
-       ptl_unlock0     spc,t0
+       ptl_unlock0     spc,t0,t1
        rfir
        nop
 
@@ -1528,7 +1527,7 @@ dbit_trap_20:
        
        idtlbt          pte,prot
 
-       ptl_unlock0     spc,t0
+       ptl_unlock0     spc,t0,t1
        rfir
        nop
 #endif
index 6d1c781..8f37e75 100644 (file)
@@ -74,8 +74,8 @@
 static DEFINE_SPINLOCK(pdc_lock);
 #endif
 
-unsigned long pdc_result[NUM_PDC_RESULT]  __aligned(8);
-unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
+static unsigned long pdc_result[NUM_PDC_RESULT]  __aligned(8);
+static unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
 
 #ifdef CONFIG_64BIT
 #define WIDE_FIRMWARE 0x1
@@ -334,7 +334,7 @@ int __pdc_cpu_rendezvous(void)
 /**
  * pdc_cpu_rendezvous_lock - Lock PDC while transitioning to rendezvous state
  */
-void pdc_cpu_rendezvous_lock(void)
+void pdc_cpu_rendezvous_lock(void) __acquires(&pdc_lock)
 {
        spin_lock(&pdc_lock);
 }
@@ -342,7 +342,7 @@ void pdc_cpu_rendezvous_lock(void)
 /**
  * pdc_cpu_rendezvous_unlock - Unlock PDC after reaching rendezvous state
  */
-void pdc_cpu_rendezvous_unlock(void)
+void pdc_cpu_rendezvous_unlock(void) __releases(&pdc_lock)
 {
        spin_unlock(&pdc_lock);
 }
index 4d392e4..d1defb9 100644 (file)
@@ -53,7 +53,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
 
 static ftrace_func_t ftrace_func;
 
-void notrace __hot ftrace_function_trampoline(unsigned long parent,
+asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent,
                                unsigned long self_addr,
                                unsigned long org_sp_gr3,
                                struct ftrace_regs *fregs)
index 00297e8..6f0c92e 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/syscalls.h>
+#include <linux/libgcc.h>
 
 #include <linux/string.h>
 EXPORT_SYMBOL(memset);
@@ -92,12 +93,6 @@ EXPORT_SYMBOL($$divI_12);
 EXPORT_SYMBOL($$divI_14);
 EXPORT_SYMBOL($$divI_15);
 
-extern void __ashrdi3(void);
-extern void __ashldi3(void);
-extern void __lshrdi3(void);
-extern void __muldi3(void);
-extern void __ucmpdi2(void);
-
 EXPORT_SYMBOL(__ashrdi3);
 EXPORT_SYMBOL(__ashldi3);
 EXPORT_SYMBOL(__lshrdi3);
index d818ece..bf9f192 100644 (file)
@@ -39,7 +39,7 @@ static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
 static unsigned long pcxl_used_bytes __read_mostly;
 static unsigned long pcxl_used_pages __read_mostly;
 
-extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */
+unsigned long pcxl_dma_start __ro_after_init; /* pcxl dma mapping area start */
 static DEFINE_SPINLOCK(pcxl_res_lock);
 static char    *pcxl_res_map;
 static int     pcxl_res_hint;
@@ -381,7 +381,7 @@ pcxl_dma_init(void)
        pcxl_res_map = (char *)__get_free_pages(GFP_KERNEL,
                                            get_order(pcxl_res_size));
        memset(pcxl_res_map, 0, pcxl_res_size);
-       proc_gsc_root = proc_mkdir("gsc", NULL);
+       proc_gsc_root = proc_mkdir("bus/gsc", NULL);
        if (!proc_gsc_root)
                printk(KERN_WARNING
                        "pcxl_dma_init: Unable to create gsc /proc dir entry\n");
@@ -417,14 +417,6 @@ void *arch_dma_alloc(struct device *dev, size_t size,
        map_uncached_pages(vaddr, size, paddr);
        *dma_handle = (dma_addr_t) paddr;
 
-#if 0
-/* This probably isn't needed to support EISA cards.
-** ISA cards will certainly only support 24-bit DMA addressing.
-** Not clear if we can, want, or need to support ISA.
-*/
-       if (!dev || *dev->coherent_dma_mask < 0xffffffff)
-               gfp |= GFP_DMA;
-#endif
        return (void *)vaddr;
 }
 
index 0d24735..0f9b3b5 100644 (file)
@@ -354,10 +354,8 @@ static int __init pdt_initcall(void)
                return -ENODEV;
 
        kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd");
-       if (IS_ERR(kpdtd_task))
-               return PTR_ERR(kpdtd_task);
 
-       return 0;
+       return PTR_ERR_OR_ZERO(kpdtd_task);
 }
 
 late_initcall(pdt_initcall);
index 90b04d8..b0f0816 100644 (file)
@@ -57,7 +57,7 @@ struct rdr_tbl_ent {
 static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
 static int perf_enabled __read_mostly;
 static DEFINE_SPINLOCK(perf_lock);
-struct parisc_device *cpu_device __read_mostly;
+static struct parisc_device *cpu_device __read_mostly;
 
 /* RDRs to write for PCX-W */
 static const int perf_rdrs_W[] =
index 00b0df9..762289b 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/pdc.h>
+#include <asm/smp.h>
 #include <asm/pdcpat.h>
 #include <asm/irq.h>           /* for struct irq_region */
 #include <asm/parisc-device.h>
index 573f830..211a4af 100644 (file)
 
 static char __initdata command_line[COMMAND_LINE_SIZE];
 
-/* Intended for ccio/sba/cpu statistics under /proc/bus/{runway|gsc} */
-struct proc_dir_entry * proc_runway_root __read_mostly = NULL;
-struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
-struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL;
-
 static void __init setup_cmdline(char **cmdline_p)
 {
        extern unsigned int boot_args[];
@@ -196,48 +191,6 @@ const struct seq_operations cpuinfo_op = {
        .show   = show_cpuinfo
 };
 
-static void __init parisc_proc_mkdir(void)
-{
-       /*
-       ** Can't call proc_mkdir() until after proc_root_init() has been
-       ** called by start_kernel(). In other words, this code can't
-       ** live in arch/.../setup.c because start_parisc() calls
-       ** start_kernel().
-       */
-       switch (boot_cpu_data.cpu_type) {
-       case pcxl:
-       case pcxl2:
-               if (NULL == proc_gsc_root)
-               {
-                       proc_gsc_root = proc_mkdir("bus/gsc", NULL);
-               }
-               break;
-        case pcxt_:
-        case pcxu:
-        case pcxu_:
-        case pcxw:
-        case pcxw_:
-        case pcxw2:
-                if (NULL == proc_runway_root)
-                {
-                        proc_runway_root = proc_mkdir("bus/runway", NULL);
-                }
-                break;
-       case mako:
-       case mako2:
-                if (NULL == proc_mckinley_root)
-                {
-                        proc_mckinley_root = proc_mkdir("bus/mckinley", NULL);
-                }
-                break;
-       default:
-               /* FIXME: this was added to prevent the compiler 
-                * complaining about missing pcx, pcxs and pcxt
-                * I'm assuming they have neither gsc nor runway */
-               break;
-       }
-}
-
 static struct resource central_bus = {
        .name   = "Central Bus",
        .start  = F_EXTEND(0xfff80000),
@@ -294,7 +247,6 @@ static int __init parisc_init(void)
 {
        u32 osid = (OS_ID_LINUX << 16);
 
-       parisc_proc_mkdir();
        parisc_init_resources();
        do_device_inventory();                  /* probe for hardware */
 
index f886ff0..e8d27de 100644 (file)
@@ -423,7 +423,7 @@ static void check_syscallno_in_delay_branch(struct pt_regs *regs)
        regs->gr[31] -= 8; /* delayed branching */
 
        /* Get assembler opcode of code in delay branch */
-       uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4);
+       uaddr = (u32 __user *) ((regs->gr[31] & ~3) + 4);
        err = get_user(opcode, uaddr);
        if (err)
                return;
index 1373e51..1f51aa9 100644 (file)
@@ -39,6 +39,7 @@ registers).
 #include <asm/assembly.h>
 #include <asm/processor.h>
 #include <asm/cache.h>
+#include <asm/spinlock_types.h>
 
 #include <linux/linkage.h>
 
@@ -66,6 +67,16 @@ registers).
        stw     \reg1, 0(%sr2,\reg2)
        .endm
 
+       /* raise exception if spinlock content is not zero or
+        * __ARCH_SPIN_LOCK_UNLOCKED_VAL */
+       .macro  spinlock_check spin_val,tmpreg
+#ifdef CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmpreg
+       andcm,= \spin_val, \tmpreg, %r0
+       .word   SPINLOCK_BREAK_INSN
+#endif
+       .endm
+
        .text
 
        .import syscall_exit,code
@@ -508,7 +519,8 @@ lws_start:
 
 lws_exit_noerror:
        lws_pagefault_enable    %r1,%r21
-       stw,ma  %r20, 0(%sr2,%r20)
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21
+       stw,ma  %r21, 0(%sr2,%r20)
        ssm     PSW_SM_I, %r0
        b       lws_exit
        copy    %r0, %r21
@@ -521,7 +533,8 @@ lws_wouldblock:
 
 lws_pagefault:
        lws_pagefault_enable    %r1,%r21
-       stw,ma  %r20, 0(%sr2,%r20)
+       ldi     __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21
+       stw,ma  %r21, 0(%sr2,%r20)
        ssm     PSW_SM_I, %r0
        ldo     3(%r0),%r28
        b       lws_exit
@@ -619,6 +632,7 @@ lws_compare_and_swap:
 
        /* Try to acquire the lock */
        LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
        comclr,<>       %r0, %r28, %r0
        b,n     lws_wouldblock
 
@@ -772,6 +786,7 @@ cas2_lock_start:
 
        /* Try to acquire the lock */
        LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
        comclr,<>       %r0, %r28, %r0
        b,n     lws_wouldblock
 
@@ -1001,6 +1016,7 @@ atomic_xchg_start:
 
        /* Try to acquire the lock */
        LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
        comclr,<>       %r0, %r28, %r0
        b,n     lws_wouldblock
 
@@ -1199,6 +1215,7 @@ atomic_store_start:
 
        /* Try to acquire the lock */
        LDCW    0(%sr2,%r20), %r28
+       spinlock_check  %r28, %r21
        comclr,<>       %r0, %r28, %r0
        b,n     lws_wouldblock
 
@@ -1330,7 +1347,7 @@ ENTRY(lws_lock_start)
        /* lws locks */
        .rept 256
        /* Keep locks aligned at 16-bytes */
-       .word 1
+       .word __ARCH_SPIN_LOCK_UNLOCKED_VAL
        .word 0 
        .word 0
        .word 0
index a0a9145..e97c175 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index 033b9e5..170d0dd 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/signal.h>
 #include <linux/ratelimit.h>
 #include <linux/uaccess.h>
+#include <linux/sysctl.h>
 #include <asm/unaligned.h>
 #include <asm/hardirq.h>
 #include <asm/traps.h>
@@ -337,7 +338,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
        : "r19", "r20", "r21", "r22", "r1" );
 #else
     {
-       unsigned long valh=(val>>32),vall=(val&0xffffffffl);
+       unsigned long valh = (val >> 32), vall = (val & 0xffffffffl);
        __asm__ __volatile__ (
 "      mtsp    %4, %%sr1\n"
 "      zdep    %2, 29, 2, %%r19\n"
@@ -473,7 +474,7 @@ void handle_unaligned(struct pt_regs *regs)
        case OPCODE_LDWA_I:
        case OPCODE_LDW_S:
        case OPCODE_LDWA_S:
-               ret = emulate_ldw(regs, R3(regs->iir),0);
+               ret = emulate_ldw(regs, R3(regs->iir), 0);
                break;
 
        case OPCODE_STH:
@@ -482,7 +483,7 @@ void handle_unaligned(struct pt_regs *regs)
 
        case OPCODE_STW:
        case OPCODE_STWA:
-               ret = emulate_stw(regs, R2(regs->iir),0);
+               ret = emulate_stw(regs, R2(regs->iir), 0);
                break;
 
 #ifdef CONFIG_64BIT
@@ -490,12 +491,12 @@ void handle_unaligned(struct pt_regs *regs)
        case OPCODE_LDDA_I:
        case OPCODE_LDD_S:
        case OPCODE_LDDA_S:
-               ret = emulate_ldd(regs, R3(regs->iir),0);
+               ret = emulate_ldd(regs, R3(regs->iir), 0);
                break;
 
        case OPCODE_STD:
        case OPCODE_STDA:
-               ret = emulate_std(regs, R2(regs->iir),0);
+               ret = emulate_std(regs, R2(regs->iir), 0);
                break;
 #endif
 
@@ -503,24 +504,24 @@ void handle_unaligned(struct pt_regs *regs)
        case OPCODE_FLDWS:
        case OPCODE_FLDWXR:
        case OPCODE_FLDWSR:
-               ret = emulate_ldw(regs,FR3(regs->iir),1);
+               ret = emulate_ldw(regs, FR3(regs->iir), 1);
                break;
 
        case OPCODE_FLDDX:
        case OPCODE_FLDDS:
-               ret = emulate_ldd(regs,R3(regs->iir),1);
+               ret = emulate_ldd(regs, R3(regs->iir), 1);
                break;
 
        case OPCODE_FSTWX:
        case OPCODE_FSTWS:
        case OPCODE_FSTWXR:
        case OPCODE_FSTWSR:
-               ret = emulate_stw(regs,FR3(regs->iir),1);
+               ret = emulate_stw(regs, FR3(regs->iir), 1);
                break;
 
        case OPCODE_FSTDX:
        case OPCODE_FSTDS:
-               ret = emulate_std(regs,R3(regs->iir),1);
+               ret = emulate_std(regs, R3(regs->iir), 1);
                break;
 
        case OPCODE_LDCD_I:
index 8e6014a..9d8b4db 100644 (file)
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/module.h>
+#include <linux/libgcc.h>
 
 union ull_union {
        unsigned long long ull;
@@ -9,7 +10,7 @@ union ull_union {
        } ui;
 };
 
-int __ucmpdi2(unsigned long long a, unsigned long long b)
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
 {
        union ull_union au = {.ull = a};
        union ull_union bu = {.ull = b};
index a4c7c76..2fe5b44 100644 (file)
@@ -192,31 +192,31 @@ int fixup_exception(struct pt_regs *regs)
  * For implementation see handle_interruption() in traps.c
  */
 static const char * const trap_description[] = {
-       [1] "High-priority machine check (HPMC)",
-       [2] "Power failure interrupt",
-       [3] "Recovery counter trap",
-       [5] "Low-priority machine check",
-       [6] "Instruction TLB miss fault",
-       [7] "Instruction access rights / protection trap",
-       [8] "Illegal instruction trap",
-       [9] "Break instruction trap",
-       [10] "Privileged operation trap",
-       [11] "Privileged register trap",
-       [12] "Overflow trap",
-       [13] "Conditional trap",
-       [14] "FP Assist Exception trap",
-       [15] "Data TLB miss fault",
-       [16] "Non-access ITLB miss fault",
-       [17] "Non-access DTLB miss fault",
-       [18] "Data memory protection/unaligned access trap",
-       [19] "Data memory break trap",
-       [20] "TLB dirty bit trap",
-       [21] "Page reference trap",
-       [22] "Assist emulation trap",
-       [25] "Taken branch trap",
-       [26] "Data memory access rights trap",
-       [27] "Data memory protection ID trap",
-       [28] "Unaligned data reference trap",
+       [1] =   "High-priority machine check (HPMC)",
+       [2] =   "Power failure interrupt",
+       [3] =   "Recovery counter trap",
+       [5] =   "Low-priority machine check",
+       [6] =   "Instruction TLB miss fault",
+       [7] =   "Instruction access rights / protection trap",
+       [8] =   "Illegal instruction trap",
+       [9] =   "Break instruction trap",
+       [10] =  "Privileged operation trap",
+       [11] =  "Privileged register trap",
+       [12] =  "Overflow trap",
+       [13] =  "Conditional trap",
+       [14] =  "FP Assist Exception trap",
+       [15] =  "Data TLB miss fault",
+       [16] =  "Non-access ITLB miss fault",
+       [17] =  "Non-access DTLB miss fault",
+       [18] =  "Data memory protection/unaligned access trap",
+       [19] =  "Data memory break trap",
+       [20] =  "TLB dirty bit trap",
+       [21] =  "Page reference trap",
+       [22] =  "Assist emulation trap",
+       [25] =  "Taken branch trap",
+       [26] =  "Data memory access rights trap",
+       [27] =  "Data memory protection ID trap",
+       [28] =  "Unaligned data reference trap",
 };
 
 const char *trap_name(unsigned long code)
index cc15d73..ae3493d 100644 (file)
@@ -19,9 +19,6 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
        pmd_t *pmd = pmd_offset(pud, vaddr);
        pte_t *pte;
 
-       if (pmd_none(*pmd))
-               pte = pte_alloc_kernel(pmd, vaddr);
-
        pte = pte_offset_kernel(pmd, vaddr);
        set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX));
        flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
index 406c52f..a088c24 100644 (file)
@@ -523,10 +523,6 @@ void mark_rodata_ro(void)
 void *parisc_vmalloc_start __ro_after_init;
 EXPORT_SYMBOL(parisc_vmalloc_start);
 
-#ifdef CONFIG_PA11
-unsigned long pcxl_dma_start __ro_after_init;
-#endif
-
 void __init mem_init(void)
 {
        /* Do sanity checks on IPC (compat) structures */
@@ -669,6 +665,39 @@ static void __init gateway_init(void)
                  PAGE_SIZE, PAGE_GATEWAY, 1);
 }
 
+static void __init fixmap_init(void)
+{
+       unsigned long addr = FIXMAP_START;
+       unsigned long end = FIXMAP_START + FIXMAP_SIZE;
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
+       pud_t *pud = pud_offset(p4d, addr);
+       pmd_t *pmd;
+
+       BUILD_BUG_ON(FIXMAP_SIZE > PMD_SIZE);
+
+#if CONFIG_PGTABLE_LEVELS == 3
+       if (pud_none(*pud)) {
+               pmd = memblock_alloc(PAGE_SIZE << PMD_TABLE_ORDER,
+                                    PAGE_SIZE << PMD_TABLE_ORDER);
+               if (!pmd)
+                       panic("fixmap: pmd allocation failed.\n");
+               pud_populate(NULL, pud, pmd);
+       }
+#endif
+
+       pmd = pmd_offset(pud, addr);
+       do {
+               pte_t *pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pte)
+                       panic("fixmap: pte allocation failed.\n");
+
+               pmd_populate_kernel(&init_mm, pmd, pte);
+
+               addr += PAGE_SIZE;
+       } while (addr < end);
+}
+
 static void __init parisc_bootmem_free(void)
 {
        unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, };
@@ -683,6 +712,7 @@ void __init paging_init(void)
        setup_bootmem();
        pagetable_init();
        gateway_init();
+       fixmap_init();
        flush_cache_all_local(); /* start with known state */
        flush_tlb_all_local(NULL);
 
index 345ff0b..d7ee1f4 100644 (file)
@@ -27,7 +27,7 @@
  */
 void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
 {
-       void __iomem *addr;
+       uintptr_t addr;
        struct vm_struct *area;
        unsigned long offset, last_addr;
        pgprot_t pgprot;
@@ -79,10 +79,9 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
        if (!area)
                return NULL;
 
-       addr = (void __iomem *) area->addr;
-       if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
-                              phys_addr, pgprot)) {
-               vunmap(addr);
+       addr = (uintptr_t) area->addr;
+       if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
+               vunmap(area->addr);
                return NULL;
        }
 
index ef09786..51499ee 100644 (file)
@@ -79,7 +79,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_REISERFS_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
index ecbcc85..e708049 100644 (file)
@@ -50,7 +50,7 @@ CONFIG_DRM=m
 CONFIG_SOUND=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=m
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=m
index 06391cc..53f43a3 100644 (file)
@@ -172,7 +172,7 @@ CONFIG_EDAC_CELL=y
 CONFIG_UIO=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_UDF_FS=m
index b9dfa3a..0d8d3f4 100644 (file)
@@ -47,7 +47,7 @@ CONFIG_SERIAL_CPM_CONSOLE=y
 # CONFIG_USB_SUPPORT is not set
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
index ce220ce..2101bfe 100644 (file)
@@ -60,7 +60,7 @@ CONFIG_USB_FSL_USB2=y
 CONFIG_USB_G_SERIAL=y
 CONFIG_UIO=y
 CONFIG_EXT2_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_JFFS2_FS=y
index 96aa535..6199394 100644 (file)
@@ -143,7 +143,7 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=y
 CONFIG_UDF_FS=y
 CONFIG_MSDOS_FS=y
index 019163c..05ed585 100644 (file)
@@ -254,7 +254,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
index e02ab94..ee84ade 100644 (file)
@@ -270,7 +270,7 @@ CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
 CONFIG_FANOTIFY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_OVERLAY_FS=m
 CONFIG_ISO9660_FS=y
index 268fa36..c0f4bbc 100644 (file)
@@ -327,7 +327,7 @@ CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_NILFS2_FS=m
 CONFIG_FS_DAX=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_OVERLAY_FS=m
 CONFIG_ISO9660_FS=y
index 776c329..624c371 100644 (file)
@@ -185,7 +185,7 @@ CONFIG_JFS_SECURITY=y
 CONFIG_XFS_FS=m
 CONFIG_XFS_POSIX_ACL=y
 CONFIG_FS_DAX=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_UDF_FS=m
 CONFIG_MSDOS_FS=y
index f21170b..1034aea 100644 (file)
@@ -969,7 +969,7 @@ CONFIG_XFS_POSIX_ACL=y
 CONFIG_GFS2_FS=m
 CONFIG_FS_DAX=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
index ea3ee07..1ea732c 100644 (file)
@@ -129,7 +129,7 @@ CONFIG_EXT2_FS=m
 CONFIG_EXT4_FS=y
 CONFIG_QUOTA=y
 CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_UDF_FS=m
index b6ac4f8..6472b08 100644 (file)
@@ -136,12 +136,6 @@ static inline int hash__pmd_trans_huge(pmd_t pmd)
        return 0;
 }
 
-static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
-       BUG();
-       return 0;
-}
-
 static inline pmd_t hash__pmd_mkhuge(pmd_t pmd)
 {
        BUG();
index 338e62f..0bf6fd0 100644 (file)
@@ -263,11 +263,6 @@ static inline int hash__pmd_trans_huge(pmd_t pmd)
                  (_PAGE_PTE | H_PAGE_THP_HUGE));
 }
 
-static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
-       return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
-}
-
 static inline pmd_t hash__pmd_mkhuge(pmd_t pmd)
 {
        return __pmd(pmd_val(pmd) | (_PAGE_PTE | H_PAGE_THP_HUGE));
index 17e7a77..d4a19e6 100644 (file)
@@ -132,6 +132,11 @@ static inline int get_region_id(unsigned long ea)
        return region_id;
 }
 
+static inline int hash__pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+       return (((pmd_raw(pmd_a) ^ pmd_raw(pmd_b)) & ~cpu_to_be64(_PAGE_HPTEFLAGS)) == 0);
+}
+
 #define        hash__pmd_bad(pmd)              (pmd_val(pmd) & H_PMD_BAD_BITS)
 #define        hash__pud_bad(pud)              (pud_val(pud) & H_PUD_BAD_BITS)
 static inline int hash__p4d_bad(p4d_t p4d)
index ef42adb..00c6b0b 100644 (file)
@@ -4,14 +4,13 @@
 #ifdef __KERNEL__
 
 #include <asm/asm-compat.h>
-#include <asm/extable.h>
 
 #ifdef CONFIG_BUG
 
 #ifdef __ASSEMBLY__
 #include <asm/asm-offsets.h>
 #ifdef CONFIG_DEBUG_BUGVERBOSE
-.macro __EMIT_BUG_ENTRY addr,file,line,flags
+.macro EMIT_BUG_ENTRY addr,file,line,flags
         .section __bug_table,"aw"
 5001:   .4byte \addr - .
         .4byte 5002f - .
@@ -23,7 +22,7 @@
         .previous
 .endm
 #else
-.macro __EMIT_BUG_ENTRY addr,file,line,flags
+.macro EMIT_BUG_ENTRY addr,file,line,flags
         .section __bug_table,"aw"
 5001:   .4byte \addr - .
         .short \flags
 .endm
 #endif /* verbose */
 
-.macro EMIT_WARN_ENTRY addr,file,line,flags
-       EX_TABLE(\addr,\addr+4)
-       __EMIT_BUG_ENTRY \addr,\file,\line,\flags
-.endm
-
-.macro EMIT_BUG_ENTRY addr,file,line,flags
-       .if \flags & 1 /* BUGFLAG_WARNING */
-       .err /* Use EMIT_WARN_ENTRY for warnings */
-       .endif
-       __EMIT_BUG_ENTRY \addr,\file,\line,\flags
-.endm
-
 #else /* !__ASSEMBLY__ */
 /* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and
    sizeof(struct bug_entry), respectively */
                  "i" (sizeof(struct bug_entry)),       \
                  ##__VA_ARGS__)
 
-#define WARN_ENTRY(insn, flags, label, ...)            \
-       asm_volatile_goto(                              \
-               "1:     " insn "\n"                     \
-               EX_TABLE(1b, %l[label])                 \
-               _EMIT_BUG_ENTRY                         \
-               : : "i" (__FILE__), "i" (__LINE__),     \
-                 "i" (flags),                          \
-                 "i" (sizeof(struct bug_entry)),       \
-                 ##__VA_ARGS__ : : label)
-
 /*
  * BUG_ON() and WARN_ON() do their best to cooperate with compile-time
  * optimisations. However depending on the complexity of the condition
 } while (0)
 #define HAVE_ARCH_BUG
 
-#define __WARN_FLAGS(flags) do {                               \
-       __label__ __label_warn_on;                              \
-                                                               \
-       WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \
-       barrier_before_unreachable();                           \
-       __builtin_unreachable();                                \
-                                                               \
-__label_warn_on:                                               \
-       break;                                                  \
-} while (0)
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
 
 #ifdef CONFIG_PPC64
 #define BUG_ON(x) do {                                         \
@@ -117,25 +85,15 @@ __label_warn_on:                                           \
 } while (0)
 
 #define WARN_ON(x) ({                                          \
-       bool __ret_warn_on = false;                             \
-       do {                                                    \
-               if (__builtin_constant_p((x))) {                \
-                       if (!(x))                               \
-                               break;                          \
+       int __ret_warn_on = !!(x);                              \
+       if (__builtin_constant_p(__ret_warn_on)) {              \
+               if (__ret_warn_on)                              \
                        __WARN();                               \
-                       __ret_warn_on = true;                   \
-               } else {                                        \
-                       __label__ __label_warn_on;              \
-                                                               \
-                       WARN_ENTRY(PPC_TLNEI " %4, 0",          \
-                                  BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \
-                                  __label_warn_on,             \
-                                  "r" ((__force long)(x)));    \
-                       break;                                  \
-__label_warn_on:                                               \
-                       __ret_warn_on = true;                   \
-               }                                               \
-       } while (0);                                            \
+       } else {                                                \
+               BUG_ENTRY(PPC_TLNEI " %4, 0",                   \
+                         BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),  \
+                         "r" (__ret_warn_on)); \
+       }                                                       \
        unlikely(__ret_warn_on);                                \
 })
 
@@ -148,14 +106,13 @@ __label_warn_on:                                          \
 #ifdef __ASSEMBLY__
 .macro EMIT_BUG_ENTRY addr,file,line,flags
 .endm
-.macro EMIT_WARN_ENTRY addr,file,line,flags
-.endm
 #else /* !__ASSEMBLY__ */
 #define _EMIT_BUG_ENTRY
-#define _EMIT_WARN_ENTRY
 #endif
 #endif /* CONFIG_BUG */
 
+#define EMIT_WARN_ENTRY EMIT_BUG_ENTRY
+
 #include <asm-generic/bug.h>
 
 #ifndef __ASSEMBLY__
index a26ca09..79f1c48 100644 (file)
 
 /*
  * This is used to ensure we don't load something for the wrong architecture.
- * 64le only supports ELFv2 64-bit binaries (64be supports v1 and v2).
  */
-#if defined(CONFIG_PPC64) && defined(CONFIG_CPU_LITTLE_ENDIAN)
-#define elf_check_arch(x) (((x)->e_machine == ELF_ARCH) && \
-                          (((x)->e_flags & 0x3) == 0x2))
-#else
 #define elf_check_arch(x) ((x)->e_machine == ELF_ARCH)
-#endif
 #define compat_elf_check_arch(x)       ((x)->e_machine == EM_PPC)
 
 #define CORE_DUMP_USE_REGSET
index 8a6754f..a6c7069 100644 (file)
@@ -393,7 +393,6 @@ int validate_sp_size(unsigned long sp, struct task_struct *p,
  */
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 static inline void prefetch(const void *x)
 {
@@ -411,8 +410,6 @@ static inline void prefetchw(const void *x)
        __asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x));
 }
 
-#define spin_lock_prefetch(x)  prefetchw(x)
-
 /* asm stubs */
 extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
 extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
index bc5d39a..bf5dde1 100644 (file)
@@ -183,13 +183,9 @@ static inline bool test_thread_local_flags(unsigned int flags)
 #define clear_tsk_compat_task(tsk) do { } while (0)
 #endif
 
-#ifdef CONFIG_PPC64
-#ifdef CONFIG_CPU_BIG_ENDIAN
+#if defined(CONFIG_PPC64)
 #define is_elf2_task() (test_thread_flag(TIF_ELF2ABI))
 #else
-#define is_elf2_task() (1)
-#endif
-#else
 #define is_elf2_task() (0)
 #endif
 
index 46c31fb..30a12d2 100644 (file)
@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask)
        return leading_zero_bits >> 3;
 }
 
-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
 {
        unsigned long rhs = val | c->low_bits;
        *data = rhs;
index 3f86091..7ab4c8c 100644 (file)
@@ -5,6 +5,7 @@
  *  Copyright (C) 2007 Ben. Herrenschmidt (benh@kernel.crashing.org), IBM Corp.
  */
 
+#include <linux/linkage.h>
 #include <linux/threads.h>
 #include <asm/reg.h>
 #include <asm/page.h>
@@ -66,7 +67,7 @@
 #define SPECIAL_EXC_LOAD(reg, name) \
        ld      reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1)
 
-special_reg_save:
+SYM_CODE_START_LOCAL(special_reg_save)
        /*
         * We only need (or have stack space) to save this stuff if
         * we interrupted the kernel.
@@ -131,8 +132,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
        SPECIAL_EXC_STORE(r10,CSRR1)
 
        blr
+SYM_CODE_END(special_reg_save)
 
-ret_from_level_except:
+SYM_CODE_START_LOCAL(ret_from_level_except)
        ld      r3,_MSR(r1)
        andi.   r3,r3,MSR_PR
        beq     1f
@@ -206,6 +208,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
        mtxer   r11
 
        blr
+SYM_CODE_END(ret_from_level_except)
 
 .macro ret_from_level srr0 srr1 paca_ex scratch
        bl      ret_from_level_except
@@ -232,13 +235,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
        mfspr   r13,\scratch
 .endm
 
-ret_from_crit_except:
+SYM_CODE_START_LOCAL(ret_from_crit_except)
        ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH
        rfci
+SYM_CODE_END(ret_from_crit_except)
 
-ret_from_mc_except:
+SYM_CODE_START_LOCAL(ret_from_mc_except)
        ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH
        rfmci
+SYM_CODE_END(ret_from_mc_except)
 
 /* Exception prolog code for all exceptions */
 #define EXCEPTION_PROLOG(n, intnum, type, addition)                        \
@@ -978,20 +983,22 @@ masked_interrupt_book3e_0x2c0:
  * r14 and r15 containing the fault address and error code, with the
  * original values stashed away in the PACA
  */
-storage_fault_common:
+SYM_CODE_START_LOCAL(storage_fault_common)
        addi    r3,r1,STACK_INT_FRAME_REGS
        bl      do_page_fault
        b       interrupt_return
+SYM_CODE_END(storage_fault_common)
 
 /*
  * Alignment exception doesn't fit entirely in the 0x100 bytes so it
  * continues here.
  */
-alignment_more:
+SYM_CODE_START_LOCAL(alignment_more)
        addi    r3,r1,STACK_INT_FRAME_REGS
        bl      alignment_exception
        REST_NVGPRS(r1)
        b       interrupt_return
+SYM_CODE_END(alignment_more)
 
 /*
  * Trampolines used when spotting a bad kernel stack pointer in
@@ -1030,8 +1037,7 @@ BAD_STACK_TRAMPOLINE(0xe00)
 BAD_STACK_TRAMPOLINE(0xf00)
 BAD_STACK_TRAMPOLINE(0xf20)
 
-       .globl  bad_stack_book3e
-bad_stack_book3e:
+_GLOBAL(bad_stack_book3e)
        /* XXX: Needs to make SPRN_SPRG_GEN depend on exception type */
        mfspr   r10,SPRN_SRR0;            /* read SRR0 before touching stack */
        ld      r1,PACAEMERGSP(r13)
@@ -1285,8 +1291,7 @@ have_hes:
         * ever takes any parameters, the SCOM code must also be updated to
         * provide them.
         */
-       .globl a2_tlbinit_code_start
-a2_tlbinit_code_start:
+_GLOBAL(a2_tlbinit_code_start)
 
        ori     r11,r3,MAS0_WQ_ALLWAYS
        oris    r11,r11,MAS0_ESEL(3)@h /* Use way 3: workaround A2 erratum 376 */
@@ -1479,8 +1484,7 @@ _GLOBAL(book3e_secondary_thread_init)
        mflr    r28
        b       3b
 
-       .globl init_core_book3e
-init_core_book3e:
+_GLOBAL(init_core_book3e)
        /* Establish the interrupt vector base */
        tovirt(r2,r2)
        LOAD_REG_ADDR(r3, interrupt_base_book3e)
@@ -1488,7 +1492,7 @@ init_core_book3e:
        sync
        blr
 
-init_thread_book3e:
+SYM_CODE_START_LOCAL(init_thread_book3e)
        lis     r3,(SPRN_EPCR_ICM | SPRN_EPCR_GICM)@h
        mtspr   SPRN_EPCR,r3
 
@@ -1502,6 +1506,7 @@ init_thread_book3e:
        mtspr   SPRN_TSR,r3
 
        blr
+SYM_CODE_END(init_thread_book3e)
 
 _GLOBAL(__setup_base_ivors)
        SET_IVOR(0, 0x020) /* Critical Input */
index f132d87..6440b1b 100644 (file)
@@ -375,8 +375,7 @@ _GLOBAL(generic_secondary_smp_init)
        beq     20f
 
        /* start the specified thread */
-       LOAD_REG_ADDR(r5, fsl_secondary_thread_init)
-       ld      r4, 0(r5)
+       LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init))
        bl      book3e_start_thread
 
        /* stop the current thread */
index 4caf5e3..359577e 100644 (file)
@@ -709,9 +709,9 @@ static int __init rtas_flash_init(void)
        if (!rtas_validate_flash_data.buf)
                return -ENOMEM;
 
-       flash_block_cache = kmem_cache_create("rtas_flash_cache",
-                                             RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
-                                             NULL);
+       flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
+                                                      RTAS_BLK_SIZE, RTAS_BLK_SIZE,
+                                                      0, 0, RTAS_BLK_SIZE, NULL);
        if (!flash_block_cache) {
                printk(KERN_ERR "%s: failed to create block cache\n",
                                __func__);
index 206475e..4856e1a 100644 (file)
@@ -364,26 +364,27 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *
 
 static int ssb_prctl_get(struct task_struct *task)
 {
+       /*
+        * The STF_BARRIER feature is on by default, so if it's off that means
+        * firmware has explicitly said the CPU is not vulnerable via either
+        * the hypercall or device tree.
+        */
+       if (!security_ftr_enabled(SEC_FTR_STF_BARRIER))
+               return PR_SPEC_NOT_AFFECTED;
+
+       /*
+        * If the system's CPU has no known barrier (see setup_stf_barrier())
+        * then assume that the CPU is not vulnerable.
+        */
        if (stf_enabled_flush_types == STF_BARRIER_NONE)
-               /*
-                * We don't have an explicit signal from firmware that we're
-                * vulnerable or not, we only have certain CPU revisions that
-                * are known to be vulnerable.
-                *
-                * We assume that if we're on another CPU, where the barrier is
-                * NONE, then we are not vulnerable.
-                */
                return PR_SPEC_NOT_AFFECTED;
-       else
-               /*
-                * If we do have a barrier type then we are vulnerable. The
-                * barrier is not a global or per-process mitigation, so the
-                * only value we can report here is PR_SPEC_ENABLE, which
-                * appears as "vulnerable" in /proc.
-                */
-               return PR_SPEC_ENABLE;
-
-       return -EINVAL;
+
+       /*
+        * Otherwise the CPU is vulnerable. The barrier is not a global or
+        * per-process mitigation, so the only value that can be reported here
+        * is PR_SPEC_ENABLE, which appears as "vulnerable" in /proc.
+        */
+       return PR_SPEC_ENABLE;
 }
 
 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
index 8c0b08b..20e5058 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    nospu   set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index ffb1db3..1f7d86d 100644 (file)
@@ -33,6 +33,9 @@
  * and then arrange for the ftrace function to be called.
  */
 .macro ftrace_regs_entry allregs
+       /* Create a minimal stack frame for representing B */
+       PPC_STLU        r1, -STACK_FRAME_MIN_SIZE(r1)
+
        /* Create our stack frame + pt_regs */
        PPC_STLU        r1,-SWITCH_FRAME_SIZE(r1)
 
@@ -42,7 +45,7 @@
 
 #ifdef CONFIG_PPC64
        /* Save the original return address in A's stack frame */
-       std     r0, LRSAVE+SWITCH_FRAME_SIZE(r1)
+       std     r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
        /* Ok to continue? */
        lbz     r3, PACA_FTRACE_ENABLED(r13)
        cmpdi   r3, 0
@@ -77,6 +80,8 @@
        mflr    r7
        /* Save it as pt_regs->nip */
        PPC_STL r7, _NIP(r1)
+       /* Also save it in B's stackframe header for proper unwind */
+       PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
        /* Save the read LR in pt_regs->link */
        PPC_STL r0, _LINK(r1)
 
 #endif
 
        /* Pop our stack frame */
-       addi r1, r1, SWITCH_FRAME_SIZE
+       addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
 
 #ifdef CONFIG_LIVEPATCH_64
         /* Based on the cmpd above, if the NIP was altered handle livepatch */
index e59ec6d..7ef147e 100644 (file)
@@ -1508,13 +1508,8 @@ static void do_program_check(struct pt_regs *regs)
 
                if (!(regs->msr & MSR_PR) &&  /* not user-mode */
                    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
-                       const struct exception_table_entry *entry;
-
-                       entry = search_exception_tables(bugaddr);
-                       if (entry) {
-                               regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr);
-                               return;
-                       }
+                       regs_add_return_ip(regs, 4);
+                       return;
                }
 
                if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && user_mode(regs)) {
index 9342e79..430d1d9 100644 (file)
@@ -328,10 +328,12 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
 
 static long native_hpte_remove(unsigned long hpte_group)
 {
+       unsigned long hpte_v, flags;
        struct hash_pte *hptep;
        int i;
        int slot_offset;
-       unsigned long hpte_v;
+
+       local_irq_save(flags);
 
        DBG_LOW("    remove(group=%lx)\n", hpte_group);
 
@@ -356,13 +358,16 @@ static long native_hpte_remove(unsigned long hpte_group)
                slot_offset &= 0x7;
        }
 
-       if (i == HPTES_PER_GROUP)
-               return -1;
+       if (i == HPTES_PER_GROUP) {
+               i = -1;
+               goto out;
+       }
 
        /* Invalidate the hpte. NOTE: this also unlocks it */
        release_hpte_lock();
        hptep->v = 0;
-
+out:
+       local_irq_restore(flags);
        return i;
 }
 
index 0dc8555..ec98e52 100644 (file)
@@ -145,6 +145,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops subpage_walk_ops = {
        .pmd_entry      = subpage_walk_pmd_entry,
+       .walk_lock      = PGWALK_WRLOCK_VERIFY,
 };
 
 static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
index fe1b830..0ec5b45 100644 (file)
@@ -314,8 +314,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
        start = ALIGN_DOWN(start, page_size);
        if (altmap) {
                alt_start = altmap->base_pfn;
-               alt_end = altmap->base_pfn + altmap->reserve +
-                         altmap->free + altmap->alloc + altmap->align;
+               alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
        }
 
        pr_debug("vmemmap_free %lx...%lx\n", start, end);
index 699eeff..f9522fd 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
 
 obj-$(CONFIG_PPC32)            += init_32.o
 obj-$(CONFIG_PPC_8xx)          += 8xx.o
index 1bfb295..c1e9816 100644 (file)
@@ -477,7 +477,7 @@ static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int mpc512x_lpbfifo_remove(struct platform_device *pdev)
+static void mpc512x_lpbfifo_remove(struct platform_device *pdev)
 {
        unsigned long flags;
        struct dma_device *dma_dev = lpbfifo.chan->device;
@@ -494,8 +494,6 @@ static int mpc512x_lpbfifo_remove(struct platform_device *pdev)
        free_irq(lpbfifo.irq, &pdev->dev);
        irq_dispose_mapping(lpbfifo.irq);
        dma_release_channel(lpbfifo.chan);
-
-       return 0;
 }
 
 static const struct of_device_id mpc512x_lpbfifo_match[] = {
@@ -506,7 +504,7 @@ MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match);
 
 static struct platform_driver mpc512x_lpbfifo_driver = {
        .probe = mpc512x_lpbfifo_probe,
-       .remove = mpc512x_lpbfifo_remove,
+       .remove_new = mpc512x_lpbfifo_remove,
        .driver = {
                .name = DRV_NAME,
                .of_match_table = mpc512x_lpbfifo_match,
index 9c43cf3..40aa582 100644 (file)
@@ -180,7 +180,7 @@ static void wake_hw_thread(void *info)
        unsigned long inia;
        int cpu = *(const int *)info;
 
-       inia = *(unsigned long *)fsl_secondary_thread_init;
+       inia = ppc_function_entry(fsl_secondary_thread_init);
        book3e_start_thread(cpu_thread_in_core(cpu), inia);
 }
 #endif
index ea807aa..38c5be3 100644 (file)
@@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode)
        inode->i_mode = mode;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 out:
        return inode;
 }
index 4c5790a..8633891 100644 (file)
@@ -26,8 +26,8 @@
 #include <linux/rtc.h>
 #include <linux/of_address.h>
 
+#include <asm/early_ioremap.h>
 #include <asm/sections.h>
-#include <asm/io.h>
 #include <asm/machdep.h>
 #include <asm/time.h>
 #include <asm/nvram.h>
@@ -182,7 +182,7 @@ static int __init via_calibrate_decr(void)
                return 0;
        }
        of_node_put(vias);
-       via = ioremap(rsrc.start, resource_size(&rsrc));
+       via = early_ioremap(rsrc.start, resource_size(&rsrc));
        if (via == NULL) {
                printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
                return 0;
@@ -207,7 +207,7 @@ static int __init via_calibrate_decr(void)
 
        ppc_tb_freq = (dstart - dend) * 100 / 6;
 
-       iounmap(via);
+       early_iounmap((void *)via, resource_size(&rsrc));
 
        return 1;
 }
index 9a44a98..3fbc2a6 100644 (file)
@@ -744,6 +744,12 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
                }
 
                task_ref = &win->vas_win.task_ref;
+               /*
+                * VAS mmap (coproc_mmap()) and its fault handler
+                * (vas_mmap_fault()) are called after holding mmap lock.
+                * So hold mmap mutex after mmap_lock to avoid deadlock.
+                */
+               mmap_write_lock(task_ref->mm);
                mutex_lock(&task_ref->mmap_mutex);
                vma = task_ref->vma;
                /*
@@ -752,7 +758,6 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
                 */
                win->vas_win.status |= flag;
 
-               mmap_write_lock(task_ref->mm);
                /*
                 * vma is set in the original mapping. But this mapping
                 * is done with mmap() after the window is opened with ioctl.
@@ -762,8 +767,8 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
                if (vma)
                        zap_vma_pages(vma);
 
-               mmap_write_unlock(task_ref->mm);
                mutex_unlock(&task_ref->mmap_mutex);
+               mmap_write_unlock(task_ref->mm);
                /*
                 * Close VAS window in the hypervisor, but do not
                 * free vas_window struct since it may be reused
index 4c07b91..bea7b73 100644 (file)
@@ -570,24 +570,30 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE
 config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
        def_bool y
        # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
-       depends on AS_IS_GNU && AS_VERSION >= 23800
-       help
-         Newer binutils versions default to ISA spec version 20191213 which
-         moves some instructions from the I extension to the Zicsr and Zifencei
-         extensions.
+       # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=98416dbb0a62579d4a7a4a76bab51b5b52fec2cd
+       depends on AS_IS_GNU && AS_VERSION >= 23600
+       help
+         Binutils-2.38 and GCC-12.1.0 bumped the default ISA spec to the newer
+         20191213 version, which moves some instructions from the I extension to
+         the Zicsr and Zifencei extensions. This requires explicitly specifying
+         Zicsr and Zifencei when binutils >= 2.38 or GCC >= 12.1.0. Zicsr
+         and Zifencei are supported in binutils from version 2.36 onwards.
+         To make life easier, and avoid forcing toolchains that default to a
+         newer ISA spec to version 2.2, relax the check to binutils >= 2.36.
+         For clang < 17 or GCC < 11.3.0, for which this is not possible or need
+         special treatment, this is dealt with in TOOLCHAIN_NEEDS_OLD_ISA_SPEC.
 
 config TOOLCHAIN_NEEDS_OLD_ISA_SPEC
        def_bool y
        depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
        # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16
-       depends on CC_IS_CLANG && CLANG_VERSION < 170000
-       help
-         Certain versions of clang do not support zicsr and zifencei via -march
-         but newer versions of binutils require it for the reasons noted in the
-         help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This
-         option causes an older ISA spec compatible with these older versions
-         of clang to be passed to GAS, which has the same result as passing zicsr
-         and zifencei to -march.
+       # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671
+       depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110300)
+       help
+         Certain versions of clang and GCC do not support zicsr and zifencei via
+         -march. This option causes an older ISA spec compatible with these older
+         versions of clang and GCC to be passed to GAS, which has the same result
+         as passing zicsr and zifencei to -march.
 
 config FPU
        bool "FPU support"
index 0a01074..ab86ec3 100644 (file)
@@ -192,7 +192,7 @@ CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
 CONFIG_BTRFS_FS=m
 CONFIG_BTRFS_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_OVERLAY_FS=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
index 38760e4..89b601e 100644 (file)
@@ -98,7 +98,7 @@ CONFIG_RPMSG_CTRL=y
 CONFIG_RPMSG_VIRTIO=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_TMPFS=y
index f71ce21..d5604d2 100644 (file)
@@ -19,7 +19,7 @@ typedef u64 phys_cpuid_t;
 #define PHYS_CPUID_INVALID INVALID_HARTID
 
 /* ACPI table mapping after acpi_permanent_mmap is set */
-void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
 #define acpi_os_ioremap acpi_os_ioremap
 
 #define acpi_strict 1  /* No out-of-spec workarounds on RISC-V */
index 8091b8b..b93ffdd 100644 (file)
@@ -37,6 +37,10 @@ static inline void flush_dcache_page(struct page *page)
 #define flush_icache_user_page(vma, pg, addr, len) \
        flush_icache_mm(vma->vm_mm, 0)
 
+#ifdef CONFIG_64BIT
+#define flush_cache_vmap(start, end)   flush_tlb_kernel_range(start, end)
+#endif
+
 #ifndef CONFIG_SMP
 
 #define flush_icache_all() local_flush_icache_all()
index 29e9a0d..8a6a128 100644 (file)
@@ -21,12 +21,6 @@ extern void efi_init(void);
 int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md);
 int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, bool);
 
-#define arch_efi_call_virt_setup()      ({             \
-               sync_kernel_mappings(efi_mm.pgd);       \
-               efi_virtmap_load();                     \
-       })
-#define arch_efi_call_virt_teardown()   efi_virtmap_unload()
-
 #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE)
 
 /* Load initrd anywhere in system RAM */
@@ -46,8 +40,8 @@ static inline unsigned long efi_get_kimg_min_align(void)
 
 #define EFI_KIMG_PREFERRED_ADDRESS     efi_get_kimg_min_align()
 
-void efi_virtmap_load(void);
-void efi_virtmap_unload(void);
+void arch_efi_call_virt_setup(void);
+void arch_efi_call_virt_teardown(void);
 
 unsigned long stext_offset(void);
 
index 4e1505c..fce0040 100644 (file)
 #define RVC_INSN_FUNCT4_OPOFF  12
 #define RVC_INSN_FUNCT3_MASK   GENMASK(15, 13)
 #define RVC_INSN_FUNCT3_OPOFF  13
+#define RVC_INSN_J_RS1_MASK    GENMASK(11, 7)
 #define RVC_INSN_J_RS2_MASK    GENMASK(6, 2)
 #define RVC_INSN_OPCODE_MASK   GENMASK(1, 0)
 #define RVC_ENCODE_FUNCT3(f_)  (RVC_FUNCT3_##f_ << RVC_INSN_FUNCT3_OPOFF)
@@ -245,8 +246,6 @@ __RISCV_INSN_FUNCS(c_jal, RVC_MASK_C_JAL, RVC_MATCH_C_JAL)
 __RISCV_INSN_FUNCS(auipc, RVG_MASK_AUIPC, RVG_MATCH_AUIPC)
 __RISCV_INSN_FUNCS(jalr, RVG_MASK_JALR, RVG_MATCH_JALR)
 __RISCV_INSN_FUNCS(jal, RVG_MASK_JAL, RVG_MATCH_JAL)
-__RISCV_INSN_FUNCS(c_jr, RVC_MASK_C_JR, RVC_MATCH_C_JR)
-__RISCV_INSN_FUNCS(c_jalr, RVC_MASK_C_JALR, RVC_MATCH_C_JALR)
 __RISCV_INSN_FUNCS(c_j, RVC_MASK_C_J, RVC_MATCH_C_J)
 __RISCV_INSN_FUNCS(beq, RVG_MASK_BEQ, RVG_MATCH_BEQ)
 __RISCV_INSN_FUNCS(bne, RVG_MASK_BNE, RVG_MATCH_BNE)
@@ -273,6 +272,18 @@ static __always_inline bool riscv_insn_is_branch(u32 code)
        return (code & RV_INSN_OPCODE_MASK) == RVG_OPCODE_BRANCH;
 }
 
+static __always_inline bool riscv_insn_is_c_jr(u32 code)
+{
+       return (code & RVC_MASK_C_JR) == RVC_MATCH_C_JR &&
+              (code & RVC_INSN_J_RS1_MASK) != 0;
+}
+
+static __always_inline bool riscv_insn_is_c_jalr(u32 code)
+{
+       return (code & RVC_MASK_C_JALR) == RVC_MATCH_C_JALR &&
+              (code & RVC_INSN_J_RS1_MASK) != 0;
+}
+
 #define RV_IMM_SIGN(x) (-(((x) >> 31) & 1))
 #define RVC_IMM_SIGN(x) (-(((x) >> 12) & 1))
 #define RV_X(X, s, mask)  (((X) >> (s)) & (mask))
index aff6c33..4c58ee7 100644 (file)
@@ -101,9 +101,9 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
  * Relaxed I/O memory access primitives. These follow the Device memory
  * ordering rules but do not guarantee any ordering relative to Normal memory
  * accesses.  These are defined to order the indicated access (either a read or
- * write) with all other I/O memory accesses. Since the platform specification
- * defines that all I/O regions are strongly ordered on channel 2, no explicit
- * fences are required to enforce this ordering.
+ * write) with all other I/O memory accesses to the same peripheral. Since the
+ * platform specification defines that all I/O regions are strongly ordered on
+ * channel 0, no explicit fences are required to enforce this ordering.
  */
 /* FIXME: These are now the same as asm-generic */
 #define __io_rbr()             do {} while (0)
@@ -125,14 +125,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
 #endif
 
 /*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access.  The memory barriers here are necessary as RISC-V
+ * I/O memory access primitives.  Reads are ordered relative to any following
+ * Normal memory read and delay() loop.  Writes are ordered relative to any
+ * prior Normal memory write.  The memory barriers here are necessary as RISC-V
  * doesn't define any ordering between the memory space and the I/O space.
  */
 #define __io_br()      do {} while (0)
-#define __io_ar(v)     __asm__ __volatile__ ("fence i,r" : : : "memory")
-#define __io_bw()      __asm__ __volatile__ ("fence w,o" : : : "memory")
+#define __io_ar(v)     ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); })
+#define __io_bw()      ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); })
 #define __io_aw()      mmiowb_set_pending()
 
 #define readb(c)       ({ u8  __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
index 75970ee..b5680c9 100644 (file)
@@ -188,6 +188,8 @@ extern struct pt_alloc_ops pt_ops __initdata;
 #define PAGE_KERNEL_IO         __pgprot(_PAGE_IOREMAP)
 
 extern pgd_t swapper_pg_dir[];
+extern pgd_t trampoline_pg_dir[];
+extern pgd_t early_pg_dir[];
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline int pmd_present(pmd_t pmd)
index 3d78930..c5ee07b 100644 (file)
@@ -70,8 +70,9 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
                "csrr   %1, " __stringify(CSR_VTYPE) "\n\t"
                "csrr   %2, " __stringify(CSR_VL) "\n\t"
                "csrr   %3, " __stringify(CSR_VCSR) "\n\t"
+               "csrr   %4, " __stringify(CSR_VLENB) "\n\t"
                : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
-                 "=r" (dest->vcsr) : :);
+                 "=r" (dest->vcsr), "=r" (dest->vlenb) : :);
 }
 
 static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
index 58d3e44..924d01b 100644 (file)
@@ -3,12 +3,14 @@
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 
+extern bool pgtable_l4_enabled, pgtable_l5_enabled;
+
 #define IOREMAP_MAX_ORDER (PUD_SHIFT)
 
 #define arch_vmap_pud_supported arch_vmap_pud_supported
 static inline bool arch_vmap_pud_supported(pgprot_t prot)
 {
-       return true;
+       return pgtable_l4_enabled || pgtable_l5_enabled;
 }
 
 #define arch_vmap_pmd_supported arch_vmap_pmd_supported
diff --git a/arch/riscv/include/uapi/asm/bitsperlong.h b/arch/riscv/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..7d0b32e
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H
+#define _UAPI_ASM_RISCV_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */
index e17c550..2838001 100644 (file)
@@ -97,6 +97,7 @@ struct __riscv_v_ext_state {
        unsigned long vl;
        unsigned long vtype;
        unsigned long vcsr;
+       unsigned long vlenb;
        void *datap;
        /*
         * In signal handler, datap will be set a correct user stack offset
index 5ee03eb..56cb2c9 100644 (file)
@@ -215,9 +215,9 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
        early_iounmap(map, size);
 }
 
-void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
 {
-       return memremap(phys, size, MEMREMAP_WB);
+       return (void __iomem *)memremap(phys, size, MEMREMAP_WB);
 }
 
 #ifdef CONFIG_PCI
index 1893457..b86e5e2 100644 (file)
@@ -11,7 +11,13 @@ compat_vdso-syms += flush_icache
 COMPAT_CC := $(CC)
 COMPAT_LD := $(LD)
 
-COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32
+# binutils 2.35 does not support the zifencei extension, but in the ISA
+# spec 20191213, G stands for IMAFD_ZICSR_ZIFENCEI.
+ifdef CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
+       COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32
+else
+       COMPAT_CC_FLAGS := -march=rv32imafd -mabi=ilp32
+endif
 COMPAT_LD_FLAGS := -melf32lriscv
 
 # Disable attributes, as they're useless and break the build.
index a2fc952..35b854c 100644 (file)
 #include <asm/smp.h>
 #include <asm/pgtable.h>
 
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+       return phys_id == cpuid_to_hartid_map(cpu);
+}
+
 /*
  * Returns the hart ID of the given device tree node, or -ENODEV if the node
  * isn't an enabled and valid RISC-V hart node.
index bdcf460..a8f66c0 100644 (file)
@@ -318,18 +318,13 @@ void __init riscv_fill_hwcap(void)
                }
 
                /*
-                * Linux requires the following extensions, so we may as well
-                * always set them.
-                */
-               set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa);
-               set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa);
-
-               /*
                 * These ones were as they were part of the base ISA when the
                 * port & dt-bindings were upstreamed, and so can be set
                 * unconditionally where `i` is in riscv,isa on DT systems.
                 */
                if (acpi_disabled) {
+                       set_bit(RISCV_ISA_EXT_ZICSR, isainfo->isa);
+                       set_bit(RISCV_ISA_EXT_ZIFENCEI, isainfo->isa);
                        set_bit(RISCV_ISA_EXT_ZICNTR, isainfo->isa);
                        set_bit(RISCV_ISA_EXT_ZIHPM, isainfo->isa);
                }
index b351a3c..55f1d78 100644 (file)
@@ -18,4 +18,6 @@ void arch_crash_save_vmcoreinfo(void)
        vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
 #endif
        vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR);
+       vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n",
+                                               kernel_map.va_kernel_pa_offset);
 }
index 5372b70..c08bb5c 100644 (file)
@@ -281,7 +281,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
                kbuf.buffer = initrd;
                kbuf.bufsz = kbuf.memsz = initrd_len;
                kbuf.buf_align = PAGE_SIZE;
-               kbuf.top_down = false;
+               kbuf.top_down = true;
                kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
                ret = kexec_add_buffer(&kbuf);
                if (ret)
@@ -425,6 +425,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
                 * sym, instead of searching the whole relsec.
                 */
                case R_RISCV_PCREL_HI20:
+               case R_RISCV_CALL_PLT:
                case R_RISCV_CALL:
                        *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
                                 ENCODE_UJTYPE_IMM(val - addr);
index d0577cc..a8efa05 100644 (file)
@@ -84,6 +84,9 @@ void do_softirq_own_stack(void)
                : [sp] "r" (sp)
                : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
                  "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+#ifndef CONFIG_FRAME_POINTER
+                 "s0",
+#endif
                  "memory");
        } else
 #endif
index 1d572cf..487303e 100644 (file)
@@ -25,9 +25,6 @@ enum riscv_regset {
 #ifdef CONFIG_FPU
        REGSET_F,
 #endif
-#ifdef CONFIG_RISCV_ISA_V
-       REGSET_V,
-#endif
 };
 
 static int riscv_gpr_get(struct task_struct *target,
@@ -84,61 +81,6 @@ static int riscv_fpr_set(struct task_struct *target,
 }
 #endif
 
-#ifdef CONFIG_RISCV_ISA_V
-static int riscv_vr_get(struct task_struct *target,
-                       const struct user_regset *regset,
-                       struct membuf to)
-{
-       struct __riscv_v_ext_state *vstate = &target->thread.vstate;
-
-       if (!riscv_v_vstate_query(task_pt_regs(target)))
-               return -EINVAL;
-
-       /*
-        * Ensure the vector registers have been saved to the memory before
-        * copying them to membuf.
-        */
-       if (target == current)
-               riscv_v_vstate_save(current, task_pt_regs(current));
-
-       /* Copy vector header from vstate. */
-       membuf_write(&to, vstate, offsetof(struct __riscv_v_ext_state, datap));
-       membuf_zero(&to, sizeof(vstate->datap));
-
-       /* Copy all the vector registers from vstate. */
-       return membuf_write(&to, vstate->datap, riscv_v_vsize);
-}
-
-static int riscv_vr_set(struct task_struct *target,
-                       const struct user_regset *regset,
-                       unsigned int pos, unsigned int count,
-                       const void *kbuf, const void __user *ubuf)
-{
-       int ret, size;
-       struct __riscv_v_ext_state *vstate = &target->thread.vstate;
-
-       if (!riscv_v_vstate_query(task_pt_regs(target)))
-               return -EINVAL;
-
-       /* Copy rest of the vstate except datap */
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0,
-                                offsetof(struct __riscv_v_ext_state, datap));
-       if (unlikely(ret))
-               return ret;
-
-       /* Skip copy datap. */
-       size = sizeof(vstate->datap);
-       count -= size;
-       ubuf += size;
-
-       /* Copy all the vector registers. */
-       pos = 0;
-       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap,
-                                0, riscv_v_vsize);
-       return ret;
-}
-#endif
-
 static const struct user_regset riscv_user_regset[] = {
        [REGSET_X] = {
                .core_note_type = NT_PRSTATUS,
@@ -158,17 +100,6 @@ static const struct user_regset riscv_user_regset[] = {
                .set = riscv_fpr_set,
        },
 #endif
-#ifdef CONFIG_RISCV_ISA_V
-       [REGSET_V] = {
-               .core_note_type = NT_RISCV_VECTOR,
-               .align = 16,
-               .n = ((32 * RISCV_MAX_VLENB) +
-                     sizeof(struct __riscv_v_ext_state)) / sizeof(__u32),
-               .size = sizeof(__u32),
-               .regset_get = riscv_vr_get,
-               .set = riscv_vr_set,
-       },
-#endif
 };
 
 static const struct user_regset_view riscv_user_native_view = {
index 85bbce0..40420af 100644 (file)
@@ -61,11 +61,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid)
        return -ENOENT;
 }
 
-bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
-{
-       return phys_id == cpuid_to_hartid_map(cpu);
-}
-
 static void ipi_stop(void)
 {
        set_cpu_online(smp_processor_id(), false);
index f910dfc..f798c85 100644 (file)
@@ -297,7 +297,7 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
 asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
 {
        if (user_mode(regs)) {
-               ulong syscall = regs->a7;
+               long syscall = regs->a7;
 
                regs->epc += 4;
                regs->orig_a0 = regs->a0;
@@ -306,9 +306,9 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
 
                syscall = syscall_enter_from_user_mode(regs, syscall);
 
-               if (syscall < NR_syscalls)
+               if (syscall >= 0 && syscall < NR_syscalls)
                        syscall_handler(regs, syscall);
-               else
+               else if (syscall != -1)
                        regs->a0 = -ENOSYS;
 
                syscall_exit_to_user_mode(regs);
@@ -372,6 +372,9 @@ asmlinkage void noinstr do_irq(struct pt_regs *regs)
                : [sp] "r" (sp), [regs] "r" (regs)
                : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
                  "t0", "t1", "t2", "t3", "t4", "t5", "t6",
+#ifndef CONFIG_FRAME_POINTER
+                 "s0",
+#endif
                  "memory");
        } else
 #endif
index ec486e5..09b47eb 100644 (file)
@@ -17,8 +17,11 @@ ENTRY(__asm_copy_from_user)
        li t6, SR_SUM
        csrs CSR_STATUS, t6
 
-       /* Save for return value */
-       mv      t5, a2
+       /*
+        * Save the terminal address which will be used to compute the number
+        * of bytes copied in case of a fixup exception.
+        */
+       add     t5, a0, a2
 
        /*
         * Register allocation for code below:
@@ -176,7 +179,7 @@ ENTRY(__asm_copy_from_user)
 10:
        /* Disable access to user memory */
        csrc CSR_STATUS, t6
-       mv a0, t5
+       sub a0, t5, a0
        ret
 ENDPROC(__asm_copy_to_user)
 ENDPROC(__asm_copy_from_user)
@@ -228,7 +231,7 @@ ENTRY(__clear_user)
 11:
        /* Disable access to user memory */
        csrc CSR_STATUS, t6
-       mv a0, a1
+       sub a0, a3, a0
        ret
 ENDPROC(__clear_user)
 EXPORT_SYMBOL(__clear_user)
index 70fb319..e4c35ac 100644 (file)
 #include <linux/kfence.h>
 
 #include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-#include <asm/sections.h>
-#include <asm/soc.h>
 #include <asm/io.h>
-#include <asm/ptdump.h>
 #include <asm/numa.h>
+#include <asm/pgtable.h>
+#include <asm/ptdump.h>
+#include <asm/sections.h>
+#include <asm/soc.h>
+#include <asm/tlbflush.h>
 
 #include "../kernel/head.h"
 
@@ -214,8 +215,13 @@ static void __init setup_bootmem(void)
        memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
 
        phys_ram_end = memblock_end_of_DRAM();
+
+       /*
+        * Make sure we align the start of the memory on a PMD boundary so that
+        * at worst, we map the linear mapping with PMD mappings.
+        */
        if (!IS_ENABLED(CONFIG_XIP_KERNEL))
-               phys_ram_base = memblock_start_of_DRAM();
+               phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
 
        /*
         * In 64-bit, any use of __va/__pa before this point is wrong as we
@@ -1346,7 +1352,7 @@ static void __init reserve_crashkernel(void)
         */
        crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
                                               search_start,
-                                              min(search_end, (unsigned long) SZ_4G));
+                                              min(search_end, (unsigned long)(SZ_4G - 1)));
        if (crash_base == 0) {
                /* Try again without restricting region to 32bit addressible memory */
                crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
index 8fc0efc..a01bc15 100644 (file)
@@ -22,7 +22,6 @@
  * region is not and then we have to go down to the PUD level.
  */
 
-extern pgd_t early_pg_dir[PTRS_PER_PGD];
 pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss;
 pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss;
index ea3d61d..161d0b3 100644 (file)
@@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr_ops = {
        .pmd_entry = pageattr_pmd_entry,
        .pte_entry = pageattr_pte_entry,
        .pte_hole = pageattr_pte_hole,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
index bf9802a..2717f54 100644 (file)
@@ -69,7 +69,7 @@ struct rv_jit_context {
        struct bpf_prog *prog;
        u16 *insns;             /* RV insns */
        int ninsns;
-       int body_len;
+       int prologue_len;
        int epilogue_offset;
        int *offset;            /* BPF to RV */
        int nexentries;
@@ -216,8 +216,8 @@ static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx)
        int from, to;
 
        off++; /* BPF branch is from PC+1, RV is from PC */
-       from = (insn > 0) ? ctx->offset[insn - 1] : 0;
-       to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
+       from = (insn > 0) ? ctx->offset[insn - 1] : ctx->prologue_len;
+       to = (insn + off > 0) ? ctx->offset[insn + off - 1] : ctx->prologue_len;
        return ninsns_rvoff(to - from);
 }
 
index 737baf8..7a26a3e 100644 (file)
@@ -44,7 +44,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
        unsigned int prog_size = 0, extable_size = 0;
        bool tmp_blinded = false, extra_pass = false;
        struct bpf_prog *tmp, *orig_prog = prog;
-       int pass = 0, prev_ninsns = 0, prologue_len, i;
+       int pass = 0, prev_ninsns = 0, i;
        struct rv_jit_data *jit_data;
        struct rv_jit_context *ctx;
 
@@ -83,6 +83,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                prog = orig_prog;
                goto out_offset;
        }
+
+       if (build_body(ctx, extra_pass, NULL)) {
+               prog = orig_prog;
+               goto out_offset;
+       }
+
        for (i = 0; i < prog->len; i++) {
                prev_ninsns += 32;
                ctx->offset[i] = prev_ninsns;
@@ -91,12 +97,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
        for (i = 0; i < NR_JIT_ITERATIONS; i++) {
                pass++;
                ctx->ninsns = 0;
+
+               bpf_jit_build_prologue(ctx);
+               ctx->prologue_len = ctx->ninsns;
+
                if (build_body(ctx, extra_pass, ctx->offset)) {
                        prog = orig_prog;
                        goto out_offset;
                }
-               ctx->body_len = ctx->ninsns;
-               bpf_jit_build_prologue(ctx);
+
                ctx->epilogue_offset = ctx->ninsns;
                bpf_jit_build_epilogue(ctx);
 
@@ -162,10 +171,8 @@ skip_init_ctx:
 
        if (!prog->is_func || extra_pass) {
                bpf_jit_binary_lock_ro(jit_data->header);
-               prologue_len = ctx->epilogue_offset - ctx->body_len;
                for (i = 0; i < prog->len; i++)
-                       ctx->offset[i] = ninsns_rvoff(prologue_len +
-                                                     ctx->offset[i]);
+                       ctx->offset[i] = ninsns_rvoff(ctx->offset[i]);
                bpf_prog_fill_jited_linfo(prog, ctx->offset);
 out_offset:
                kfree(ctx->offset);
index 76e3622..8e4d74f 100644 (file)
@@ -3,7 +3,7 @@ obj-y                           += kernel/
 obj-y                          += mm/
 obj-$(CONFIG_KVM)              += kvm/
 obj-y                          += crypto/
-obj-$(CONFIG_S390_HYPFS_FS)    += hypfs/
+obj-$(CONFIG_S390_HYPFS)       += hypfs/
 obj-$(CONFIG_APPLDATA_BASE)    += appldata/
 obj-y                          += net/
 obj-$(CONFIG_PCI)              += pci/
index 5b39918..18bf754 100644 (file)
@@ -174,6 +174,7 @@ config S390
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_FUNCTION_ARG_ACCESS_API
        select HAVE_FUNCTION_ERROR_INJECTION
+       select HAVE_FUNCTION_GRAPH_RETVAL
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
        select HAVE_GCC_PLUGINS
@@ -512,6 +513,17 @@ config KEXEC_SIG
          verification for the corresponding kernel image type being
          loaded in order for this to work.
 
+config CERT_STORE
+       bool "Get user certificates via DIAG320"
+       depends on KEYS
+       select CRYPTO_LIB_SHA256
+       help
+         Enable this option if you want to access user-provided secure boot
+         certificates via DIAG 0x320.
+
+         These certificates will be made available via the keyring named
+         'cert_store'.
+
 config KERNEL_NOBP
        def_bool n
        prompt "Enable modified branch prediction for the kernel by default"
@@ -743,9 +755,9 @@ config CRASH_DUMP
          Crash dump kernels are loaded in the main kernel with kexec-tools
          into a specially reserved region and then later executed after
          a crash by kdump/kexec.
-         Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this.
+         Refer to <file:Documentation/arch/s390/zfcpdump.rst> for more details on this.
          This option also enables s390 zfcpdump.
-         See also <file:Documentation/s390/zfcpdump.rst>
+         See also <file:Documentation/arch/s390/zfcpdump.rst>
 
 endmenu
 
@@ -867,13 +879,24 @@ config APPLDATA_NET_SUM
          This can also be compiled as a module, which will be called
          appldata_net_sum.o.
 
-config S390_HYPFS_FS
+config S390_HYPFS
        def_bool y
+       prompt "s390 hypervisor information"
+       help
+         This provides several binary files at (debugfs)/s390_hypfs/ to
+         provide accounting information in an s390 hypervisor environment.
+
+config S390_HYPFS_FS
+       def_bool n
        prompt "s390 hypervisor file system support"
        select SYS_HYPERVISOR
+       depends on S390_HYPFS
        help
          This is a virtual file system intended to provide accounting
-         information in an s390 hypervisor environment.
+         information in an s390 hypervisor environment. This file system
+         is deprecated and should not be used.
+
+         Say N if you are unsure.
 
 source "arch/s390/kvm/Kconfig"
 
index 5ed2428..a53a36e 100644 (file)
@@ -119,7 +119,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR
 OBJCOPYFLAGS   := -O binary
 
 libs-y         += arch/s390/lib/
-drivers-y      += drivers/s390/
 
 boot           := arch/s390/boot
 syscalls       := arch/s390/kernel/syscalls
index 64bd7ac..b9681cb 100644 (file)
@@ -27,6 +27,7 @@ struct page *__bootdata_preserved(vmemmap);
 unsigned long __bootdata_preserved(vmemmap_size);
 unsigned long __bootdata_preserved(MODULES_VADDR);
 unsigned long __bootdata_preserved(MODULES_END);
+unsigned long __bootdata_preserved(max_mappable);
 unsigned long __bootdata(ident_map_size);
 
 u64 __bootdata_preserved(stfle_fac_list[16]);
@@ -176,6 +177,7 @@ static unsigned long setup_kernel_memory_layout(void)
        unsigned long asce_limit;
        unsigned long rte_size;
        unsigned long pages;
+       unsigned long vsize;
        unsigned long vmax;
 
        pages = ident_map_size / PAGE_SIZE;
@@ -183,19 +185,19 @@ static unsigned long setup_kernel_memory_layout(void)
        vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
 
        /* choose kernel address space layout: 4 or 3 levels. */
-       vmemmap_start = round_up(ident_map_size, _REGION3_SIZE);
-       if (IS_ENABLED(CONFIG_KASAN) ||
-           vmalloc_size > _REGION2_SIZE ||
-           vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
-                   _REGION2_SIZE) {
+       vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size +
+               MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE;
+       vsize = size_add(vsize, vmalloc_size);
+       if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) {
                asce_limit = _REGION1_SIZE;
                rte_size = _REGION2_SIZE;
        } else {
                asce_limit = _REGION2_SIZE;
                rte_size = _REGION3_SIZE;
        }
+
        /*
-        * forcing modules and vmalloc area under the ultravisor
+        * Forcing modules and vmalloc area under the ultravisor
         * secure storage limit, so that any vmalloc allocation
         * we do could be used to back secure guest storage.
         */
@@ -204,7 +206,7 @@ static unsigned long setup_kernel_memory_layout(void)
        /* force vmalloc and modules below kasan shadow */
        vmax = min(vmax, KASAN_SHADOW_START);
 #endif
-       __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE);
+       __memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE);
        __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
                                   sizeof(struct lowcore));
        MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE);
@@ -220,8 +222,9 @@ static unsigned long setup_kernel_memory_layout(void)
        pages = SECTION_ALIGN_UP(pages);
        /* keep vmemmap_start aligned to a top level region table entry */
        vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size);
-       /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */
        vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
+       /* maximum mappable address as seen by arch_get_mappable_range() */
+       max_mappable = vmemmap_start;
        /* make sure identity map doesn't overlay with vmemmap */
        ident_map_size = min(ident_map_size, vmemmap_start);
        vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
@@ -286,8 +289,9 @@ void startup_kernel(void)
 
        setup_lpp();
        safe_addr = mem_safe_offset();
+
        /*
-        * reserve decompressor memory together with decompression heap, buffer and
+        * Reserve decompressor memory together with decompression heap, buffer and
         * memory which might be occupied by uncompressed kernel at default 1Mb
         * position (if KASLR is off or failed).
         */
index aa95cf6..af2fbe4 100644 (file)
@@ -116,7 +116,6 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
-CONFIG_NET_TC_SKB_EXT=y
 CONFIG_SMC=m
 CONFIG_SMC_DIAG=m
 CONFIG_INET=y
@@ -193,6 +192,7 @@ CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
 CONFIG_NFT_HASH=m
 CONFIG_NFT_FIB_INET=m
+CONFIG_NETFILTER_XTABLES_COMPAT=y
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_AUDIT=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -379,6 +379,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_NET_ACT_GATE=m
+CONFIG_NET_TC_SKB_EXT=y
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
 CONFIG_VSOCKETS=m
@@ -395,6 +396,7 @@ CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_SAFE=y
+# CONFIG_FW_LOADER is not set
 CONFIG_CONNECTOR=y
 CONFIG_ZRAM=y
 CONFIG_BLK_DEV_LOOP=m
@@ -502,7 +504,6 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_GOOGLE is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 # CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_WANGXUN is not set
 # CONFIG_NET_VENDOR_LITEX is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
@@ -542,6 +543,7 @@ CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VERTEXCOM is not set
 # CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -624,7 +626,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QUOTA_DEBUG=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
 CONFIG_VIRTIO_FS=m
@@ -646,7 +648,6 @@ CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_TMPFS_INODE64=y
 CONFIG_HUGETLBFS=y
-CONFIG_CONFIGFS_FS=m
 CONFIG_ECRYPT_FS=m
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=m
@@ -690,7 +691,6 @@ CONFIG_HARDENED_USERCOPY=y
 CONFIG_FORTIFY_SOURCE=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_SECURITY_LOCKDOWN_LSM=y
 CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
 CONFIG_SECURITY_LANDLOCK=y
@@ -744,7 +744,6 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -836,6 +835,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300
 # CONFIG_RCU_TRACE is not set
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
 CONFIG_FPROBE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_STACK_TRACER=y
@@ -844,6 +844,7 @@ CONFIG_PREEMPT_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_USER_EVENTS=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_FTRACE_STARTUP_TEST=y
 # CONFIG_EVENT_TRACE_STARTUP_TEST is not set
@@ -866,6 +867,7 @@ CONFIG_FAIL_MAKE_REQUEST=y
 CONFIG_FAIL_IO_TIMEOUT=y
 CONFIG_FAIL_FUTEX=y
 CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION_CONFIGFS=y
 CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
 CONFIG_LKDTM=m
 CONFIG_TEST_MIN_HEAP=y
index f041945..3f263b7 100644 (file)
@@ -107,7 +107,6 @@ CONFIG_UNIX=y
 CONFIG_UNIX_DIAG=m
 CONFIG_XFRM_USER=m
 CONFIG_NET_KEY=m
-CONFIG_NET_TC_SKB_EXT=y
 CONFIG_SMC=m
 CONFIG_SMC_DIAG=m
 CONFIG_INET=y
@@ -184,6 +183,7 @@ CONFIG_NFT_REJECT=m
 CONFIG_NFT_COMPAT=m
 CONFIG_NFT_HASH=m
 CONFIG_NFT_FIB_INET=m
+CONFIG_NETFILTER_XTABLES_COMPAT=y
 CONFIG_NETFILTER_XT_SET=m
 CONFIG_NETFILTER_XT_TARGET_AUDIT=m
 CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -369,6 +369,7 @@ CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_NET_ACT_GATE=m
+CONFIG_NET_TC_SKB_EXT=y
 CONFIG_DNS_RESOLVER=y
 CONFIG_OPENVSWITCH=m
 CONFIG_VSOCKETS=m
@@ -385,6 +386,7 @@ CONFIG_HOTPLUG_PCI_S390=y
 CONFIG_UEVENT_HELPER=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_SAFE=y
+# CONFIG_FW_LOADER is not set
 CONFIG_CONNECTOR=y
 CONFIG_ZRAM=y
 CONFIG_BLK_DEV_LOOP=m
@@ -492,7 +494,6 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_GOOGLE is not set
 # CONFIG_NET_VENDOR_HUAWEI is not set
 # CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_WANGXUN is not set
 # CONFIG_NET_VENDOR_LITEX is not set
 # CONFIG_NET_VENDOR_MARVELL is not set
 CONFIG_MLX4_EN=m
@@ -532,6 +533,7 @@ CONFIG_MLX5_CORE_EN=y
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VERTEXCOM is not set
 # CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -609,7 +611,7 @@ CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
 CONFIG_VIRTIO_FS=m
@@ -673,7 +675,6 @@ CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_SECURITY_LOCKDOWN_LSM=y
 CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
 CONFIG_SECURITY_LANDLOCK=y
@@ -729,7 +730,6 @@ CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MD5=y
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
 CONFIG_CRYPTO_SM3_GENERIC=m
 CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_WP512=m
@@ -787,12 +787,14 @@ CONFIG_RCU_REF_SCALE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTTIME_TRACING=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
 CONFIG_FPROBE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_STACK_TRACER=y
 CONFIG_SCHED_TRACER=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_USER_EVENTS=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_SAMPLES=y
 CONFIG_SAMPLE_TRACE_PRINTK=m
index 6f68b39..e62fb20 100644 (file)
@@ -53,7 +53,6 @@ CONFIG_ZFCP=y
 # CONFIG_HVC_IUCV is not set
 # CONFIG_HW_RANDOM_S390 is not set
 # CONFIG_HMC_DRV is not set
-# CONFIG_S390_UV_UAPI is not set
 # CONFIG_S390_TAPE is not set
 # CONFIG_VMCP is not set
 # CONFIG_MONWRITER is not set
index d29a9d9..8b541e4 100644 (file)
@@ -35,7 +35,7 @@
  * and padding is also possible, the limits need to be generous.
  */
 #define PAES_MIN_KEYSIZE 16
-#define PAES_MAX_KEYSIZE 320
+#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE
 
 static u8 *ctrblk;
 static DEFINE_MUTEX(ctrblk_lock);
@@ -103,7 +103,7 @@ static inline void _free_kb_keybuf(struct key_blob *kb)
 {
        if (kb->key && kb->key != kb->keybuf
            && kb->keylen > sizeof(kb->keybuf)) {
-               kfree(kb->key);
+               kfree_sensitive(kb->key);
                kb->key = NULL;
        }
 }
index 06f6015..c34854d 100644 (file)
@@ -3,7 +3,12 @@
 # Makefile for the linux hypfs filesystem routines.
 #
 
-obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o
+obj-$(CONFIG_S390_HYPFS)       += hypfs_dbfs.o
+obj-$(CONFIG_S390_HYPFS)       += hypfs_diag.o
+obj-$(CONFIG_S390_HYPFS)       += hypfs_diag0c.o
+obj-$(CONFIG_S390_HYPFS)       += hypfs_sprp.o
+obj-$(CONFIG_S390_HYPFS)       += hypfs_vm.o
 
-s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o
-s390_hypfs-objs += hypfs_diag0c.o
+obj-$(CONFIG_S390_HYPFS_FS)    += hypfs_diag_fs.o
+obj-$(CONFIG_S390_HYPFS_FS)    += hypfs_vm_fs.o
+obj-$(CONFIG_S390_HYPFS_FS)    += inode.o
index 05f3f9a..65f4036 100644 (file)
@@ -46,6 +46,15 @@ void hypfs_diag0c_exit(void);
 void hypfs_sprp_init(void);
 void hypfs_sprp_exit(void);
 
+int __hypfs_fs_init(void);
+
+static inline int hypfs_fs_init(void)
+{
+       if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+               return __hypfs_fs_init();
+       return 0;
+}
+
 /* debugfs interface */
 struct hypfs_dbfs_file;
 
@@ -69,7 +78,6 @@ struct hypfs_dbfs_file {
        struct dentry           *dentry;
 };
 
-extern void hypfs_dbfs_init(void);
 extern void hypfs_dbfs_exit(void);
 extern void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df);
 extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df);
index f4c7dbf..4024599 100644 (file)
@@ -90,12 +90,33 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df)
        debugfs_remove(df->dentry);
 }
 
-void hypfs_dbfs_init(void)
+static int __init hypfs_dbfs_init(void)
 {
-       dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
-}
+       int rc = -ENODATA;
 
-void hypfs_dbfs_exit(void)
-{
+       dbfs_dir = debugfs_create_dir("s390_hypfs", NULL);
+       if (hypfs_diag_init())
+               goto fail_dbfs_exit;
+       if (hypfs_vm_init())
+               goto fail_hypfs_diag_exit;
+       hypfs_sprp_init();
+       if (hypfs_diag0c_init())
+               goto fail_hypfs_sprp_exit;
+       rc = hypfs_fs_init();
+       if (rc)
+               goto fail_hypfs_diag0c_exit;
+       return 0;
+
+fail_hypfs_diag0c_exit:
+       hypfs_diag0c_exit();
+fail_hypfs_sprp_exit:
+       hypfs_sprp_exit();
+       hypfs_vm_exit();
+fail_hypfs_diag_exit:
+       hypfs_diag_exit();
+       pr_err("Initialization of hypfs failed with rc=%i\n", rc);
+fail_dbfs_exit:
        debugfs_remove(dbfs_dir);
+       return rc;
 }
+device_initcall(hypfs_dbfs_init)
index c3be533..279b7bb 100644 (file)
 #include <linux/mm.h>
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
+#include "hypfs_diag.h"
 #include "hypfs.h"
 
-#define TMP_SIZE 64            /* size of temporary buffers */
-
 #define DBFS_D204_HDR_VERSION  0
 
-static char *diag224_cpu_names;                        /* diag 224 name table */
 static enum diag204_sc diag204_store_sc;       /* used subcode for store */
 static enum diag204_format diag204_info_type;  /* used diag 204 data format */
 
 static void *diag204_buf;              /* 4K aligned buffer for diag204 data */
-static void *diag204_buf_vmalloc;      /* vmalloc pointer for diag204 data */
 static int diag204_buf_pages;          /* number of pages for diag204 data */
 
 static struct dentry *dbfs_d204_file;
 
-/*
- * DIAG 204 member access functions.
- *
- * Since we have two different diag 204 data formats for old and new s390
- * machines, we do not access the structs directly, but use getter functions for
- * each struct member instead. This should make the code more readable.
- */
-
-/* Time information block */
-
-static inline int info_blk_hdr__size(enum diag204_format type)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return sizeof(struct diag204_info_blk_hdr);
-       else /* DIAG204_INFO_EXT */
-               return sizeof(struct diag204_x_info_blk_hdr);
-}
-
-static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_info_blk_hdr *)hdr)->npar;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
-}
-
-static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_info_blk_hdr *)hdr)->flags;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
-}
-
-/* Partition header */
-
-static inline int part_hdr__size(enum diag204_format type)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return sizeof(struct diag204_part_hdr);
-       else /* DIAG204_INFO_EXT */
-               return sizeof(struct diag204_x_part_hdr);
-}
-
-static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_part_hdr *)hdr)->cpus;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_part_hdr *)hdr)->rcpus;
-}
-
-static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
-                                      char *name)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
-                      DIAG204_LPAR_NAME_LEN);
-       else /* DIAG204_INFO_EXT */
-               memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
-                      DIAG204_LPAR_NAME_LEN);
-       EBCASC(name, DIAG204_LPAR_NAME_LEN);
-       name[DIAG204_LPAR_NAME_LEN] = 0;
-       strim(name);
-}
-
-/* CPU info block */
-
-static inline int cpu_info__size(enum diag204_format type)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return sizeof(struct diag204_cpu_info);
-       else /* DIAG204_INFO_EXT */
-               return sizeof(struct diag204_x_cpu_info);
-}
-
-static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_cpu_info *)hdr)->ctidx;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_cpu_info *)hdr)->ctidx;
-}
-
-static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
+enum diag204_format diag204_get_info_type(void)
 {
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_cpu_info *)hdr)->cpu_addr;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
+       return diag204_info_type;
 }
 
-static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
+static void diag204_set_info_type(enum diag204_format type)
 {
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_cpu_info *)hdr)->acc_time;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_cpu_info *)hdr)->acc_time;
-}
-
-static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_cpu_info *)hdr)->lp_time;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_cpu_info *)hdr)->lp_time;
-}
-
-static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return 0;       /* online_time not available in simple info */
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_cpu_info *)hdr)->online_time;
-}
-
-/* Physical header */
-
-static inline int phys_hdr__size(enum diag204_format type)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return sizeof(struct diag204_phys_hdr);
-       else /* DIAG204_INFO_EXT */
-               return sizeof(struct diag204_x_phys_hdr);
-}
-
-static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_phys_hdr *)hdr)->cpus;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_phys_hdr *)hdr)->cpus;
-}
-
-/* Physical CPU info block */
-
-static inline int phys_cpu__size(enum diag204_format type)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return sizeof(struct diag204_phys_cpu);
-       else /* DIAG204_INFO_EXT */
-               return sizeof(struct diag204_x_phys_cpu);
-}
-
-static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
-}
-
-static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_phys_cpu *)hdr)->mgm_time;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
-}
-
-static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
-{
-       if (type == DIAG204_INFO_SIMPLE)
-               return ((struct diag204_phys_cpu *)hdr)->ctidx;
-       else /* DIAG204_INFO_EXT */
-               return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
+       diag204_info_type = type;
 }
 
 /* Diagnose 204 functions */
@@ -212,43 +51,11 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
 
 static void diag204_free_buffer(void)
 {
-       if (!diag204_buf)
-               return;
-       if (diag204_buf_vmalloc) {
-               vfree(diag204_buf_vmalloc);
-               diag204_buf_vmalloc = NULL;
-       } else {
-               free_pages((unsigned long) diag204_buf, 0);
-       }
+       vfree(diag204_buf);
        diag204_buf = NULL;
 }
 
-static void *page_align_ptr(void *ptr)
-{
-       return (void *) PAGE_ALIGN((unsigned long) ptr);
-}
-
-static void *diag204_alloc_vbuf(int pages)
-{
-       /* The buffer has to be page aligned! */
-       diag204_buf_vmalloc = vmalloc(array_size(PAGE_SIZE, (pages + 1)));
-       if (!diag204_buf_vmalloc)
-               return ERR_PTR(-ENOMEM);
-       diag204_buf = page_align_ptr(diag204_buf_vmalloc);
-       diag204_buf_pages = pages;
-       return diag204_buf;
-}
-
-static void *diag204_alloc_rbuf(void)
-{
-       diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0);
-       if (!diag204_buf)
-               return ERR_PTR(-ENOMEM);
-       diag204_buf_pages = 1;
-       return diag204_buf;
-}
-
-static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
+void *diag204_get_buffer(enum diag204_format fmt, int *pages)
 {
        if (diag204_buf) {
                *pages = diag204_buf_pages;
@@ -256,15 +63,19 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
        }
        if (fmt == DIAG204_INFO_SIMPLE) {
                *pages = 1;
-               return diag204_alloc_rbuf();
        } else {/* DIAG204_INFO_EXT */
                *pages = diag204((unsigned long)DIAG204_SUBC_RSI |
                                 (unsigned long)DIAG204_INFO_EXT, 0, NULL);
                if (*pages <= 0)
-                       return ERR_PTR(-ENOSYS);
-               else
-                       return diag204_alloc_vbuf(*pages);
+                       return ERR_PTR(-EOPNOTSUPP);
        }
+       diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE),
+                                    PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+                                    __builtin_return_address(0));
+       if (!diag204_buf)
+               return ERR_PTR(-ENOMEM);
+       diag204_buf_pages = *pages;
+       return diag204_buf;
 }
 
 /*
@@ -291,13 +102,13 @@ static int diag204_probe(void)
                if (diag204((unsigned long)DIAG204_SUBC_STIB7 |
                            (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
                        diag204_store_sc = DIAG204_SUBC_STIB7;
-                       diag204_info_type = DIAG204_INFO_EXT;
+                       diag204_set_info_type(DIAG204_INFO_EXT);
                        goto out;
                }
                if (diag204((unsigned long)DIAG204_SUBC_STIB6 |
                            (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
                        diag204_store_sc = DIAG204_SUBC_STIB6;
-                       diag204_info_type = DIAG204_INFO_EXT;
+                       diag204_set_info_type(DIAG204_INFO_EXT);
                        goto out;
                }
                diag204_free_buffer();
@@ -313,10 +124,10 @@ static int diag204_probe(void)
        if (diag204((unsigned long)DIAG204_SUBC_STIB4 |
                    (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) {
                diag204_store_sc = DIAG204_SUBC_STIB4;
-               diag204_info_type = DIAG204_INFO_SIMPLE;
+               diag204_set_info_type(DIAG204_INFO_SIMPLE);
                goto out;
        } else {
-               rc = -ENOSYS;
+               rc = -EOPNOTSUPP;
                goto fail_store;
        }
 out:
@@ -327,58 +138,13 @@ fail_alloc:
        return rc;
 }
 
-static int diag204_do_store(void *buf, int pages)
+int diag204_store(void *buf, int pages)
 {
        int rc;
 
-       rc = diag204((unsigned long) diag204_store_sc |
-                    (unsigned long) diag204_info_type, pages, buf);
-       return rc < 0 ? -ENOSYS : 0;
-}
-
-static void *diag204_store(void)
-{
-       void *buf;
-       int pages, rc;
-
-       buf = diag204_get_buffer(diag204_info_type, &pages);
-       if (IS_ERR(buf))
-               goto out;
-       rc = diag204_do_store(buf, pages);
-       if (rc)
-               return ERR_PTR(rc);
-out:
-       return buf;
-}
-
-/* Diagnose 224 functions */
-
-static int diag224_get_name_table(void)
-{
-       /* memory must be below 2GB */
-       diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA);
-       if (!diag224_cpu_names)
-               return -ENOMEM;
-       if (diag224(diag224_cpu_names)) {
-               free_page((unsigned long) diag224_cpu_names);
-               return -EOPNOTSUPP;
-       }
-       EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
-       return 0;
-}
-
-static void diag224_delete_name_table(void)
-{
-       free_page((unsigned long) diag224_cpu_names);
-}
-
-static int diag224_idx2name(int index, char *name)
-{
-       memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
-              DIAG204_CPU_NAME_LEN);
-       name[DIAG204_CPU_NAME_LEN] = 0;
-       strim(name);
-       return 0;
+       rc = diag204((unsigned long)diag204_store_sc |
+                    (unsigned long)diag204_get_info_type(), pages, buf);
+       return rc < 0 ? -EOPNOTSUPP : 0;
 }
 
 struct dbfs_d204_hdr {
@@ -403,8 +169,8 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size)
        base = vzalloc(buf_size);
        if (!base)
                return -ENOMEM;
-       d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr);
-       rc = diag204_do_store(d204->buf, diag204_buf_pages);
+       d204 = PTR_ALIGN(base + sizeof(d204->hdr), PAGE_SIZE) - sizeof(d204->hdr);
+       rc = diag204_store(d204->buf, diag204_buf_pages);
        if (rc) {
                vfree(base);
                return rc;
@@ -433,176 +199,21 @@ __init int hypfs_diag_init(void)
                return -ENODATA;
        }
 
-       if (diag204_info_type == DIAG204_INFO_EXT)
+       if (diag204_get_info_type() == DIAG204_INFO_EXT)
                hypfs_dbfs_create_file(&dbfs_file_d204);
 
-       if (MACHINE_IS_LPAR) {
-               rc = diag224_get_name_table();
-               if (rc) {
-                       pr_err("The hardware system does not provide all "
-                              "functions required by hypfs\n");
-                       debugfs_remove(dbfs_d204_file);
-                       return rc;
-               }
+       rc = hypfs_diag_fs_init();
+       if (rc) {
+               pr_err("The hardware system does not provide all functions required by hypfs\n");
+               debugfs_remove(dbfs_d204_file);
        }
-       return 0;
+       return rc;
 }
 
 void hypfs_diag_exit(void)
 {
        debugfs_remove(dbfs_d204_file);
-       diag224_delete_name_table();
+       hypfs_diag_fs_exit();
        diag204_free_buffer();
        hypfs_dbfs_remove_file(&dbfs_file_d204);
 }
-
-/*
- * Functions to create the directory structure
- * *******************************************
- */
-
-static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
-{
-       struct dentry *cpu_dir;
-       char buffer[TMP_SIZE];
-       void *rc;
-
-       snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type,
-                                                           cpu_info));
-       cpu_dir = hypfs_mkdir(cpus_dir, buffer);
-       rc = hypfs_create_u64(cpu_dir, "mgmtime",
-                             cpu_info__acc_time(diag204_info_type, cpu_info) -
-                             cpu_info__lp_time(diag204_info_type, cpu_info));
-       if (IS_ERR(rc))
-               return PTR_ERR(rc);
-       rc = hypfs_create_u64(cpu_dir, "cputime",
-                             cpu_info__lp_time(diag204_info_type, cpu_info));
-       if (IS_ERR(rc))
-               return PTR_ERR(rc);
-       if (diag204_info_type == DIAG204_INFO_EXT) {
-               rc = hypfs_create_u64(cpu_dir, "onlinetime",
-                                     cpu_info__online_time(diag204_info_type,
-                                                           cpu_info));
-               if (IS_ERR(rc))
-                       return PTR_ERR(rc);
-       }
-       diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer);
-       rc = hypfs_create_str(cpu_dir, "type", buffer);
-       return PTR_ERR_OR_ZERO(rc);
-}
-
-static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
-{
-       struct dentry *cpus_dir;
-       struct dentry *lpar_dir;
-       char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
-       void *cpu_info;
-       int i;
-
-       part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
-       lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
-       lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
-       if (IS_ERR(lpar_dir))
-               return lpar_dir;
-       cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
-       if (IS_ERR(cpus_dir))
-               return cpus_dir;
-       cpu_info = part_hdr + part_hdr__size(diag204_info_type);
-       for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) {
-               int rc;
-               rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
-               if (rc)
-                       return ERR_PTR(rc);
-               cpu_info += cpu_info__size(diag204_info_type);
-       }
-       return cpu_info;
-}
-
-static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
-{
-       struct dentry *cpu_dir;
-       char buffer[TMP_SIZE];
-       void *rc;
-
-       snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type,
-                                                           cpu_info));
-       cpu_dir = hypfs_mkdir(cpus_dir, buffer);
-       if (IS_ERR(cpu_dir))
-               return PTR_ERR(cpu_dir);
-       rc = hypfs_create_u64(cpu_dir, "mgmtime",
-                             phys_cpu__mgm_time(diag204_info_type, cpu_info));
-       if (IS_ERR(rc))
-               return PTR_ERR(rc);
-       diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer);
-       rc = hypfs_create_str(cpu_dir, "type", buffer);
-       return PTR_ERR_OR_ZERO(rc);
-}
-
-static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
-{
-       int i;
-       void *cpu_info;
-       struct dentry *cpus_dir;
-
-       cpus_dir = hypfs_mkdir(parent_dir, "cpus");
-       if (IS_ERR(cpus_dir))
-               return cpus_dir;
-       cpu_info = phys_hdr + phys_hdr__size(diag204_info_type);
-       for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) {
-               int rc;
-               rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
-               if (rc)
-                       return ERR_PTR(rc);
-               cpu_info += phys_cpu__size(diag204_info_type);
-       }
-       return cpu_info;
-}
-
-int hypfs_diag_create_files(struct dentry *root)
-{
-       struct dentry *systems_dir, *hyp_dir;
-       void *time_hdr, *part_hdr;
-       int i, rc;
-       void *buffer, *ptr;
-
-       buffer = diag204_store();
-       if (IS_ERR(buffer))
-               return PTR_ERR(buffer);
-
-       systems_dir = hypfs_mkdir(root, "systems");
-       if (IS_ERR(systems_dir)) {
-               rc = PTR_ERR(systems_dir);
-               goto err_out;
-       }
-       time_hdr = (struct x_info_blk_hdr *)buffer;
-       part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type);
-       for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) {
-               part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
-               if (IS_ERR(part_hdr)) {
-                       rc = PTR_ERR(part_hdr);
-                       goto err_out;
-               }
-       }
-       if (info_blk_hdr__flags(diag204_info_type, time_hdr) &
-           DIAG204_LPAR_PHYS_FLG) {
-               ptr = hypfs_create_phys_files(root, part_hdr);
-               if (IS_ERR(ptr)) {
-                       rc = PTR_ERR(ptr);
-                       goto err_out;
-               }
-       }
-       hyp_dir = hypfs_mkdir(root, "hyp");
-       if (IS_ERR(hyp_dir)) {
-               rc = PTR_ERR(hyp_dir);
-               goto err_out;
-       }
-       ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
-       if (IS_ERR(ptr)) {
-               rc = PTR_ERR(ptr);
-               goto err_out;
-       }
-       rc = 0;
-
-err_out:
-       return rc;
-}
diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h
new file mode 100644 (file)
index 0000000..7090eff
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _S390_HYPFS_DIAG_H_
+#define _S390_HYPFS_DIAG_H_
+
+#include <asm/diag.h>
+
+enum diag204_format diag204_get_info_type(void);
+void *diag204_get_buffer(enum diag204_format fmt, int *pages);
+int diag204_store(void *buf, int pages);
+
+int __hypfs_diag_fs_init(void);
+void __hypfs_diag_fs_exit(void);
+
+static inline int hypfs_diag_fs_init(void)
+{
+       if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+               return __hypfs_diag_fs_init();
+       return 0;
+}
+
+static inline void hypfs_diag_fs_exit(void)
+{
+       if (IS_ENABLED(CONFIG_S390_HYPFS_FS))
+               __hypfs_diag_fs_exit();
+}
+
+#endif /* _S390_HYPFS_DIAG_H_ */
diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c
new file mode 100644 (file)
index 0000000..00a6d37
--- /dev/null
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. Diag 204 and 224
+ *    implementation.
+ *
+ *    Copyright IBM Corp. 2006, 2008
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#define KMSG_COMPONENT "hypfs"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include "hypfs_diag.h"
+#include "hypfs.h"
+
+#define TMP_SIZE 64            /* size of temporary buffers */
+
+static char *diag224_cpu_names;                        /* diag 224 name table */
+static int diag224_idx2name(int index, char *name);
+
+/*
+ * DIAG 204 member access functions.
+ *
+ * Since we have two different diag 204 data formats for old and new s390
+ * machines, we do not access the structs directly, but use getter functions for
+ * each struct member instead. This should make the code more readable.
+ */
+
+/* Time information block */
+
+static inline int info_blk_hdr__size(enum diag204_format type)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return sizeof(struct diag204_info_blk_hdr);
+       else /* DIAG204_INFO_EXT */
+               return sizeof(struct diag204_x_info_blk_hdr);
+}
+
+static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_info_blk_hdr *)hdr)->npar;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
+}
+
+static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_info_blk_hdr *)hdr)->flags;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
+}
+
+/* Partition header */
+
+static inline int part_hdr__size(enum diag204_format type)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return sizeof(struct diag204_part_hdr);
+       else /* DIAG204_INFO_EXT */
+               return sizeof(struct diag204_x_part_hdr);
+}
+
+static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_part_hdr *)hdr)->cpus;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_part_hdr *)hdr)->rcpus;
+}
+
+static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
+                                      char *name)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
+                      DIAG204_LPAR_NAME_LEN);
+       else /* DIAG204_INFO_EXT */
+               memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
+                      DIAG204_LPAR_NAME_LEN);
+       EBCASC(name, DIAG204_LPAR_NAME_LEN);
+       name[DIAG204_LPAR_NAME_LEN] = 0;
+       strim(name);
+}
+
+/* CPU info block */
+
+static inline int cpu_info__size(enum diag204_format type)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return sizeof(struct diag204_cpu_info);
+       else /* DIAG204_INFO_EXT */
+               return sizeof(struct diag204_x_cpu_info);
+}
+
+static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_cpu_info *)hdr)->ctidx;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_cpu_info *)hdr)->ctidx;
+}
+
+static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_cpu_info *)hdr)->cpu_addr;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
+}
+
+static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_cpu_info *)hdr)->acc_time;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_cpu_info *)hdr)->acc_time;
+}
+
+static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_cpu_info *)hdr)->lp_time;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_cpu_info *)hdr)->lp_time;
+}
+
+static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return 0;       /* online_time not available in simple info */
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_cpu_info *)hdr)->online_time;
+}
+
+/* Physical header */
+
+static inline int phys_hdr__size(enum diag204_format type)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return sizeof(struct diag204_phys_hdr);
+       else /* DIAG204_INFO_EXT */
+               return sizeof(struct diag204_x_phys_hdr);
+}
+
+static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_phys_hdr *)hdr)->cpus;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_phys_hdr *)hdr)->cpus;
+}
+
+/* Physical CPU info block */
+
+static inline int phys_cpu__size(enum diag204_format type)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return sizeof(struct diag204_phys_cpu);
+       else /* DIAG204_INFO_EXT */
+               return sizeof(struct diag204_x_phys_cpu);
+}
+
+static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
+}
+
+static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_phys_cpu *)hdr)->mgm_time;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
+}
+
+static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
+{
+       if (type == DIAG204_INFO_SIMPLE)
+               return ((struct diag204_phys_cpu *)hdr)->ctidx;
+       else /* DIAG204_INFO_EXT */
+               return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
+}
+
+/*
+ * Functions to create the directory structure
+ * *******************************************
+ */
+
+static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+       struct dentry *cpu_dir;
+       char buffer[TMP_SIZE];
+       void *rc;
+
+       snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(),
+                                                           cpu_info));
+       cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+       rc = hypfs_create_u64(cpu_dir, "mgmtime",
+                             cpu_info__acc_time(diag204_get_info_type(), cpu_info) -
+                             cpu_info__lp_time(diag204_get_info_type(), cpu_info));
+       if (IS_ERR(rc))
+               return PTR_ERR(rc);
+       rc = hypfs_create_u64(cpu_dir, "cputime",
+                             cpu_info__lp_time(diag204_get_info_type(), cpu_info));
+       if (IS_ERR(rc))
+               return PTR_ERR(rc);
+       if (diag204_get_info_type() == DIAG204_INFO_EXT) {
+               rc = hypfs_create_u64(cpu_dir, "onlinetime",
+                                     cpu_info__online_time(diag204_get_info_type(),
+                                                           cpu_info));
+               if (IS_ERR(rc))
+                       return PTR_ERR(rc);
+       }
+       diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer);
+       rc = hypfs_create_str(cpu_dir, "type", buffer);
+       return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
+{
+       struct dentry *cpus_dir;
+       struct dentry *lpar_dir;
+       char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
+       void *cpu_info;
+       int i;
+
+       part_hdr__part_name(diag204_get_info_type(), part_hdr, lpar_name);
+       lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
+       lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
+       if (IS_ERR(lpar_dir))
+               return lpar_dir;
+       cpus_dir = hypfs_mkdir(lpar_dir, "cpus");
+       if (IS_ERR(cpus_dir))
+               return cpus_dir;
+       cpu_info = part_hdr + part_hdr__size(diag204_get_info_type());
+       for (i = 0; i < part_hdr__rcpus(diag204_get_info_type(), part_hdr); i++) {
+               int rc;
+
+               rc = hypfs_create_cpu_files(cpus_dir, cpu_info);
+               if (rc)
+                       return ERR_PTR(rc);
+               cpu_info += cpu_info__size(diag204_get_info_type());
+       }
+       return cpu_info;
+}
+
+static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info)
+{
+       struct dentry *cpu_dir;
+       char buffer[TMP_SIZE];
+       void *rc;
+
+       snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(),
+                                                           cpu_info));
+       cpu_dir = hypfs_mkdir(cpus_dir, buffer);
+       if (IS_ERR(cpu_dir))
+               return PTR_ERR(cpu_dir);
+       rc = hypfs_create_u64(cpu_dir, "mgmtime",
+                             phys_cpu__mgm_time(diag204_get_info_type(), cpu_info));
+       if (IS_ERR(rc))
+               return PTR_ERR(rc);
+       diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer);
+       rc = hypfs_create_str(cpu_dir, "type", buffer);
+       return PTR_ERR_OR_ZERO(rc);
+}
+
+static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr)
+{
+       int i;
+       void *cpu_info;
+       struct dentry *cpus_dir;
+
+       cpus_dir = hypfs_mkdir(parent_dir, "cpus");
+       if (IS_ERR(cpus_dir))
+               return cpus_dir;
+       cpu_info = phys_hdr + phys_hdr__size(diag204_get_info_type());
+       for (i = 0; i < phys_hdr__cpus(diag204_get_info_type(), phys_hdr); i++) {
+               int rc;
+
+               rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info);
+               if (rc)
+                       return ERR_PTR(rc);
+               cpu_info += phys_cpu__size(diag204_get_info_type());
+       }
+       return cpu_info;
+}
+
+int hypfs_diag_create_files(struct dentry *root)
+{
+       struct dentry *systems_dir, *hyp_dir;
+       void *time_hdr, *part_hdr;
+       void *buffer, *ptr;
+       int i, rc, pages;
+
+       buffer = diag204_get_buffer(diag204_get_info_type(), &pages);
+       if (IS_ERR(buffer))
+               return PTR_ERR(buffer);
+       rc = diag204_store(buffer, pages);
+       if (rc)
+               return rc;
+
+       systems_dir = hypfs_mkdir(root, "systems");
+       if (IS_ERR(systems_dir)) {
+               rc = PTR_ERR(systems_dir);
+               goto err_out;
+       }
+       time_hdr = (struct x_info_blk_hdr *)buffer;
+       part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type());
+       for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) {
+               part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr);
+               if (IS_ERR(part_hdr)) {
+                       rc = PTR_ERR(part_hdr);
+                       goto err_out;
+               }
+       }
+       if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) &
+           DIAG204_LPAR_PHYS_FLG) {
+               ptr = hypfs_create_phys_files(root, part_hdr);
+               if (IS_ERR(ptr)) {
+                       rc = PTR_ERR(ptr);
+                       goto err_out;
+               }
+       }
+       hyp_dir = hypfs_mkdir(root, "hyp");
+       if (IS_ERR(hyp_dir)) {
+               rc = PTR_ERR(hyp_dir);
+               goto err_out;
+       }
+       ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor");
+       if (IS_ERR(ptr)) {
+               rc = PTR_ERR(ptr);
+               goto err_out;
+       }
+       rc = 0;
+
+err_out:
+       return rc;
+}
+
+/* Diagnose 224 functions */
+
+static int diag224_idx2name(int index, char *name)
+{
+       memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
+              DIAG204_CPU_NAME_LEN);
+       name[DIAG204_CPU_NAME_LEN] = 0;
+       strim(name);
+       return 0;
+}
+
+static int diag224_get_name_table(void)
+{
+       /* memory must be below 2GB */
+       diag224_cpu_names = (char *)__get_free_page(GFP_KERNEL | GFP_DMA);
+       if (!diag224_cpu_names)
+               return -ENOMEM;
+       if (diag224(diag224_cpu_names)) {
+               free_page((unsigned long)diag224_cpu_names);
+               return -EOPNOTSUPP;
+       }
+       EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16);
+       return 0;
+}
+
+static void diag224_delete_name_table(void)
+{
+       free_page((unsigned long)diag224_cpu_names);
+}
+
+int __init __hypfs_diag_fs_init(void)
+{
+       if (MACHINE_IS_LPAR)
+               return diag224_get_name_table();
+       return 0;
+}
+
+void __hypfs_diag_fs_exit(void)
+{
+       diag224_delete_name_table();
+}
index a3d881c..3db40ad 100644 (file)
 #include <asm/diag.h>
 #include <asm/ebcdic.h>
 #include <asm/timex.h>
+#include "hypfs_vm.h"
 #include "hypfs.h"
 
-#define NAME_LEN 8
 #define DBFS_D2FC_HDR_VERSION 0
 
 static char local_guest[] = "        ";
 static char all_guests[] = "*       ";
 static char *all_groups = all_guests;
-static char *guest_query;
-
-struct diag2fc_data {
-       __u32 version;
-       __u32 flags;
-       __u64 used_cpu;
-       __u64 el_time;
-       __u64 mem_min_kb;
-       __u64 mem_max_kb;
-       __u64 mem_share_kb;
-       __u64 mem_used_kb;
-       __u32 pcpus;
-       __u32 lcpus;
-       __u32 vcpus;
-       __u32 ocpus;
-       __u32 cpu_max;
-       __u32 cpu_shares;
-       __u32 cpu_use_samp;
-       __u32 cpu_delay_samp;
-       __u32 page_wait_samp;
-       __u32 idle_samp;
-       __u32 other_samp;
-       __u32 total_samp;
-       char  guest_name[NAME_LEN];
-};
-
-struct diag2fc_parm_list {
-       char userid[NAME_LEN];
-       char aci_grp[NAME_LEN];
-       __u64 addr;
-       __u32 size;
-       __u32 fmt;
-};
+char *diag2fc_guest_query;
 
 static int diag2fc(int size, char* query, void *addr)
 {
@@ -62,10 +30,10 @@ static int diag2fc(int size, char* query, void *addr)
        unsigned long rc;
        struct diag2fc_parm_list parm_list;
 
-       memcpy(parm_list.userid, query, NAME_LEN);
-       ASCEBC(parm_list.userid, NAME_LEN);
-       memcpy(parm_list.aci_grp, all_groups, NAME_LEN);
-       ASCEBC(parm_list.aci_grp, NAME_LEN);
+       memcpy(parm_list.userid, query, DIAG2FC_NAME_LEN);
+       ASCEBC(parm_list.userid, DIAG2FC_NAME_LEN);
+       memcpy(parm_list.aci_grp, all_groups, DIAG2FC_NAME_LEN);
+       ASCEBC(parm_list.aci_grp, DIAG2FC_NAME_LEN);
        parm_list.addr = (unsigned long)addr;
        parm_list.size = size;
        parm_list.fmt = 0x02;
@@ -87,7 +55,7 @@ static int diag2fc(int size, char* query, void *addr)
 /*
  * Allocate buffer for "query" and store diag 2fc at "offset"
  */
-static void *diag2fc_store(char *query, unsigned int *count, int offset)
+void *diag2fc_store(char *query, unsigned int *count, int offset)
 {
        void *data;
        int size;
@@ -108,132 +76,11 @@ static void *diag2fc_store(char *query, unsigned int *count, int offset)
        return data;
 }
 
-static void diag2fc_free(const void *data)
+void diag2fc_free(const void *data)
 {
        vfree(data);
 }
 
-#define ATTRIBUTE(dir, name, member) \
-do { \
-       void *rc; \
-       rc = hypfs_create_u64(dir, name, member); \
-       if (IS_ERR(rc)) \
-               return PTR_ERR(rc); \
-} while(0)
-
-static int hypfs_vm_create_guest(struct dentry *systems_dir,
-                                struct diag2fc_data *data)
-{
-       char guest_name[NAME_LEN + 1] = {};
-       struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
-       int dedicated_flag, capped_value;
-
-       capped_value = (data->flags & 0x00000006) >> 1;
-       dedicated_flag = (data->flags & 0x00000008) >> 3;
-
-       /* guest dir */
-       memcpy(guest_name, data->guest_name, NAME_LEN);
-       EBCASC(guest_name, NAME_LEN);
-       strim(guest_name);
-       guest_dir = hypfs_mkdir(systems_dir, guest_name);
-       if (IS_ERR(guest_dir))
-               return PTR_ERR(guest_dir);
-       ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
-
-       /* logical cpu information */
-       cpus_dir = hypfs_mkdir(guest_dir, "cpus");
-       if (IS_ERR(cpus_dir))
-               return PTR_ERR(cpus_dir);
-       ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
-       ATTRIBUTE(cpus_dir, "capped", capped_value);
-       ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
-       ATTRIBUTE(cpus_dir, "count", data->vcpus);
-       /*
-        * Note: The "weight_min" attribute got the wrong name.
-        * The value represents the number of non-stopped (operating)
-        * CPUS.
-        */
-       ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
-       ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
-       ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
-
-       /* memory information */
-       mem_dir = hypfs_mkdir(guest_dir, "mem");
-       if (IS_ERR(mem_dir))
-               return PTR_ERR(mem_dir);
-       ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
-       ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
-       ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
-       ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
-
-       /* samples */
-       samples_dir = hypfs_mkdir(guest_dir, "samples");
-       if (IS_ERR(samples_dir))
-               return PTR_ERR(samples_dir);
-       ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
-       ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
-       ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
-       ATTRIBUTE(samples_dir, "idle", data->idle_samp);
-       ATTRIBUTE(samples_dir, "other", data->other_samp);
-       ATTRIBUTE(samples_dir, "total", data->total_samp);
-       return 0;
-}
-
-int hypfs_vm_create_files(struct dentry *root)
-{
-       struct dentry *dir, *file;
-       struct diag2fc_data *data;
-       unsigned int count = 0;
-       int rc, i;
-
-       data = diag2fc_store(guest_query, &count, 0);
-       if (IS_ERR(data))
-               return PTR_ERR(data);
-
-       /* Hypervisor Info */
-       dir = hypfs_mkdir(root, "hyp");
-       if (IS_ERR(dir)) {
-               rc = PTR_ERR(dir);
-               goto failed;
-       }
-       file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
-       if (IS_ERR(file)) {
-               rc = PTR_ERR(file);
-               goto failed;
-       }
-
-       /* physical cpus */
-       dir = hypfs_mkdir(root, "cpus");
-       if (IS_ERR(dir)) {
-               rc = PTR_ERR(dir);
-               goto failed;
-       }
-       file = hypfs_create_u64(dir, "count", data->lcpus);
-       if (IS_ERR(file)) {
-               rc = PTR_ERR(file);
-               goto failed;
-       }
-
-       /* guests */
-       dir = hypfs_mkdir(root, "systems");
-       if (IS_ERR(dir)) {
-               rc = PTR_ERR(dir);
-               goto failed;
-       }
-
-       for (i = 0; i < count; i++) {
-               rc = hypfs_vm_create_guest(dir, &(data[i]));
-               if (rc)
-                       goto failed;
-       }
-       diag2fc_free(data);
-       return 0;
-
-failed:
-       diag2fc_free(data);
-       return rc;
-}
-
 struct dbfs_d2fc_hdr {
        u64     len;            /* Length of d2fc buffer without header */
        u16     version;        /* Version of header */
@@ -252,7 +99,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size)
        struct dbfs_d2fc *d2fc;
        unsigned int count;
 
-       d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr));
+       d2fc = diag2fc_store(diag2fc_guest_query, &count, sizeof(d2fc->hdr));
        if (IS_ERR(d2fc))
                return PTR_ERR(d2fc);
        store_tod_clock_ext(&d2fc->hdr.tod_ext);
@@ -277,9 +124,9 @@ int hypfs_vm_init(void)
        if (!MACHINE_IS_VM)
                return 0;
        if (diag2fc(0, all_guests, NULL) > 0)
-               guest_query = all_guests;
+               diag2fc_guest_query = all_guests;
        else if (diag2fc(0, local_guest, NULL) > 0)
-               guest_query = local_guest;
+               diag2fc_guest_query = local_guest;
        else
                return -EACCES;
        hypfs_dbfs_create_file(&dbfs_file_2fc);
diff --git a/arch/s390/hypfs/hypfs_vm.h b/arch/s390/hypfs/hypfs_vm.h
new file mode 100644 (file)
index 0000000..fe2e585
--- /dev/null
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#ifndef _S390_HYPFS_VM_H_
+#define _S390_HYPFS_VM_H_
+
+#define DIAG2FC_NAME_LEN 8
+
+struct diag2fc_data {
+       __u32 version;
+       __u32 flags;
+       __u64 used_cpu;
+       __u64 el_time;
+       __u64 mem_min_kb;
+       __u64 mem_max_kb;
+       __u64 mem_share_kb;
+       __u64 mem_used_kb;
+       __u32 pcpus;
+       __u32 lcpus;
+       __u32 vcpus;
+       __u32 ocpus;
+       __u32 cpu_max;
+       __u32 cpu_shares;
+       __u32 cpu_use_samp;
+       __u32 cpu_delay_samp;
+       __u32 page_wait_samp;
+       __u32 idle_samp;
+       __u32 other_samp;
+       __u32 total_samp;
+       char  guest_name[DIAG2FC_NAME_LEN];
+};
+
+struct diag2fc_parm_list {
+       char userid[DIAG2FC_NAME_LEN];
+       char aci_grp[DIAG2FC_NAME_LEN];
+       __u64 addr;
+       __u32 size;
+       __u32 fmt;
+};
+
+void *diag2fc_store(char *query, unsigned int *count, int offset);
+void diag2fc_free(const void *data);
+extern char *diag2fc_guest_query;
+
+#endif /* _S390_HYPFS_VM_H_ */
diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c
new file mode 100644 (file)
index 0000000..6011289
--- /dev/null
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *    Hypervisor filesystem for Linux on s390. z/VM implementation.
+ *
+ *    Copyright IBM Corp. 2006
+ *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/vmalloc.h>
+#include <asm/extable.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/timex.h>
+#include "hypfs_vm.h"
+#include "hypfs.h"
+
+#define ATTRIBUTE(dir, name, member) \
+do { \
+       void *rc; \
+       rc = hypfs_create_u64(dir, name, member); \
+       if (IS_ERR(rc)) \
+               return PTR_ERR(rc); \
+} while (0)
+
+static int hypfs_vm_create_guest(struct dentry *systems_dir,
+                                struct diag2fc_data *data)
+{
+       char guest_name[DIAG2FC_NAME_LEN + 1] = {};
+       struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir;
+       int dedicated_flag, capped_value;
+
+       capped_value = (data->flags & 0x00000006) >> 1;
+       dedicated_flag = (data->flags & 0x00000008) >> 3;
+
+       /* guest dir */
+       memcpy(guest_name, data->guest_name, DIAG2FC_NAME_LEN);
+       EBCASC(guest_name, DIAG2FC_NAME_LEN);
+       strim(guest_name);
+       guest_dir = hypfs_mkdir(systems_dir, guest_name);
+       if (IS_ERR(guest_dir))
+               return PTR_ERR(guest_dir);
+       ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time);
+
+       /* logical cpu information */
+       cpus_dir = hypfs_mkdir(guest_dir, "cpus");
+       if (IS_ERR(cpus_dir))
+               return PTR_ERR(cpus_dir);
+       ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu);
+       ATTRIBUTE(cpus_dir, "capped", capped_value);
+       ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag);
+       ATTRIBUTE(cpus_dir, "count", data->vcpus);
+       /*
+        * Note: The "weight_min" attribute got the wrong name.
+        * The value represents the number of non-stopped (operating)
+        * CPUS.
+        */
+       ATTRIBUTE(cpus_dir, "weight_min", data->ocpus);
+       ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max);
+       ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares);
+
+       /* memory information */
+       mem_dir = hypfs_mkdir(guest_dir, "mem");
+       if (IS_ERR(mem_dir))
+               return PTR_ERR(mem_dir);
+       ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb);
+       ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb);
+       ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb);
+       ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb);
+
+       /* samples */
+       samples_dir = hypfs_mkdir(guest_dir, "samples");
+       if (IS_ERR(samples_dir))
+               return PTR_ERR(samples_dir);
+       ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp);
+       ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp);
+       ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp);
+       ATTRIBUTE(samples_dir, "idle", data->idle_samp);
+       ATTRIBUTE(samples_dir, "other", data->other_samp);
+       ATTRIBUTE(samples_dir, "total", data->total_samp);
+       return 0;
+}
+
+int hypfs_vm_create_files(struct dentry *root)
+{
+       struct dentry *dir, *file;
+       struct diag2fc_data *data;
+       unsigned int count = 0;
+       int rc, i;
+
+       data = diag2fc_store(diag2fc_guest_query, &count, 0);
+       if (IS_ERR(data))
+               return PTR_ERR(data);
+
+       /* Hypervisor Info */
+       dir = hypfs_mkdir(root, "hyp");
+       if (IS_ERR(dir)) {
+               rc = PTR_ERR(dir);
+               goto failed;
+       }
+       file = hypfs_create_str(dir, "type", "z/VM Hypervisor");
+       if (IS_ERR(file)) {
+               rc = PTR_ERR(file);
+               goto failed;
+       }
+
+       /* physical cpus */
+       dir = hypfs_mkdir(root, "cpus");
+       if (IS_ERR(dir)) {
+               rc = PTR_ERR(dir);
+               goto failed;
+       }
+       file = hypfs_create_u64(dir, "count", data->lcpus);
+       if (IS_ERR(file)) {
+               rc = PTR_ERR(file);
+               goto failed;
+       }
+
+       /* guests */
+       dir = hypfs_mkdir(root, "systems");
+       if (IS_ERR(dir)) {
+               rc = PTR_ERR(dir);
+               goto failed;
+       }
+
+       for (i = 0; i < count; i++) {
+               rc = hypfs_vm_create_guest(dir, &data[i]);
+               if (rc)
+                       goto failed;
+       }
+       diag2fc_free(data);
+       return 0;
+
+failed:
+       diag2fc_free(data);
+       return rc;
+}
index ee919bf..ada8314 100644 (file)
@@ -53,7 +53,7 @@ static void hypfs_update_update(struct super_block *sb)
        struct inode *inode = d_inode(sb_info->update_file);
 
        sb_info->last_update = ktime_get_seconds();
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 }
 
 /* directory tree removal functions */
@@ -101,7 +101,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
                ret->i_mode = mode;
                ret->i_uid = hypfs_info->uid;
                ret->i_gid = hypfs_info->gid;
-               ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+               ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
                if (S_ISDIR(mode))
                        set_nlink(ret, 2);
        }
@@ -460,45 +460,18 @@ static const struct super_operations hypfs_s_ops = {
        .show_options   = hypfs_show_options,
 };
 
-static int __init hypfs_init(void)
+int __init __hypfs_fs_init(void)
 {
        int rc;
 
-       hypfs_dbfs_init();
-
-       if (hypfs_diag_init()) {
-               rc = -ENODATA;
-               goto fail_dbfs_exit;
-       }
-       if (hypfs_vm_init()) {
-               rc = -ENODATA;
-               goto fail_hypfs_diag_exit;
-       }
-       hypfs_sprp_init();
-       if (hypfs_diag0c_init()) {
-               rc = -ENODATA;
-               goto fail_hypfs_sprp_exit;
-       }
        rc = sysfs_create_mount_point(hypervisor_kobj, "s390");
        if (rc)
-               goto fail_hypfs_diag0c_exit;
+               return rc;
        rc = register_filesystem(&hypfs_type);
        if (rc)
-               goto fail_filesystem;
+               goto fail;
        return 0;
-
-fail_filesystem:
+fail:
        sysfs_remove_mount_point(hypervisor_kobj, "s390");
-fail_hypfs_diag0c_exit:
-       hypfs_diag0c_exit();
-fail_hypfs_sprp_exit:
-       hypfs_sprp_exit();
-       hypfs_vm_exit();
-fail_hypfs_diag_exit:
-       hypfs_diag_exit();
-       pr_err("Initialization of hypfs failed with rc=%i\n", rc);
-fail_dbfs_exit:
-       hypfs_dbfs_exit();
        return rc;
 }
-device_initcall(hypfs_init)
index 1a18d7b..4b90411 100644 (file)
@@ -5,6 +5,5 @@ generated-y += syscall_table.h
 generated-y += unistd_nr.h
 
 generic-y += asm-offsets.h
-generic-y += export.h
 generic-y += kvm_types.h
 generic-y += mcs_spinlock.h
index ac665b9..ccd4e14 100644 (file)
@@ -222,7 +222,7 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
 
 /*
  * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
+ * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details!
  */
 extern debug_entry_t *
 __debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
@@ -350,7 +350,7 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
 
 /*
  * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
- * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
+ * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details!
  */
 extern debug_entry_t *
 __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
index 902e033..bed8041 100644 (file)
@@ -36,6 +36,7 @@ enum diag_stat_enum {
        DIAG_STAT_X304,
        DIAG_STAT_X308,
        DIAG_STAT_X318,
+       DIAG_STAT_X320,
        DIAG_STAT_X500,
        NR_DIAG_STAT
 };
@@ -108,6 +109,8 @@ enum diag204_sc {
        DIAG204_SUBC_STIB7 = 7
 };
 
+#define DIAG204_SUBCODE_MASK 0xffff
+
 /* The two available diag 204 data formats */
 enum diag204_format {
        DIAG204_INFO_SIMPLE = 0,
index e5c5cb1..5a82b08 100644 (file)
@@ -54,6 +54,23 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *
        return NULL;
 }
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+struct fgraph_ret_regs {
+       unsigned long gpr2;
+       unsigned long fp;
+};
+
+static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
+{
+       return ret_regs->gpr2;
+}
+
+static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
+{
+       return ret_regs->fp;
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 static __always_inline unsigned long
 ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
 {
index d55ba87..e47fd8c 100644 (file)
@@ -35,7 +35,7 @@ static __always_inline void kfence_split_mapping(void)
 
 static inline bool kfence_protect_page(unsigned long addr, bool protect)
 {
-       __kernel_map_pages(virt_to_page(addr), 1, !protect);
+       __kernel_map_pages(virt_to_page((void *)addr), 1, !protect);
        return true;
 }
 
index 2bbc3d5..91bfecb 100644 (file)
@@ -1028,6 +1028,9 @@ static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
 
 extern char sie_exit;
 
+bool kvm_s390_pv_is_protected(struct kvm *kvm);
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu);
+
 extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
 extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
 
index cfec314..5022594 100644 (file)
@@ -4,6 +4,9 @@
 
 #include <linux/types.h>
 
+#define MEMCPY_REAL_SIZE       PAGE_SIZE
+#define MEMCPY_REAL_MASK       PAGE_MASK
+
 struct iov_iter;
 
 extern unsigned long __memcpy_real_area;
index a9c138f..cfec074 100644 (file)
@@ -191,8 +191,16 @@ int arch_make_page_accessible(struct page *page);
 #define phys_to_page(phys)     pfn_to_page(phys_to_pfn(phys))
 #define page_to_phys(page)     pfn_to_phys(page_to_pfn(page))
 
-#define pfn_to_virt(pfn)       __va(pfn_to_phys(pfn))
-#define virt_to_pfn(kaddr)     (phys_to_pfn(__pa(kaddr)))
+static inline void *pfn_to_virt(unsigned long pfn)
+{
+       return __va(pfn_to_phys(pfn));
+}
+
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+       return phys_to_pfn(__pa(kaddr));
+}
+
 #define pfn_to_kaddr(pfn)      pfn_to_virt(pfn)
 
 #define virt_to_page(kaddr)    pfn_to_page(virt_to_pfn(kaddr))
diff --git a/arch/s390/include/asm/pfault.h b/arch/s390/include/asm/pfault.h
new file mode 100644 (file)
index 0000000..a1bee4a
--- /dev/null
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *    Copyright IBM Corp. 1999, 2023
+ */
+#ifndef _ASM_S390_PFAULT_H
+#define _ASM_S390_PFAULT_H
+
+#include <linux/errno.h>
+
+int __pfault_init(void);
+void __pfault_fini(void);
+
+static inline int pfault_init(void)
+{
+       if (IS_ENABLED(CONFIG_PFAULT))
+               return __pfault_init();
+       return -EOPNOTSUPP;
+}
+
+static inline void pfault_fini(void)
+{
+       if (IS_ENABLED(CONFIG_PFAULT))
+               __pfault_fini();
+}
+
+#endif /* _ASM_S390_PFAULT_H */
index c55f3c3..30909fe 100644 (file)
@@ -89,8 +89,6 @@ extern unsigned long __bootdata_preserved(VMALLOC_END);
 extern struct page *__bootdata_preserved(vmemmap);
 extern unsigned long __bootdata_preserved(vmemmap_size);
 
-#define VMEM_MAX_PHYS ((unsigned long) vmemmap)
-
 extern unsigned long __bootdata_preserved(MODULES_VADDR);
 extern unsigned long __bootdata_preserved(MODULES_END);
 #define MODULES_VADDR  MODULES_VADDR
index dac7da8..5742d23 100644 (file)
@@ -86,6 +86,7 @@ struct sclp_info {
        unsigned char has_kss : 1;
        unsigned char has_gisaf : 1;
        unsigned char has_diag318 : 1;
+       unsigned char has_diag320 : 1;
        unsigned char has_sipl : 1;
        unsigned char has_sipl_eckd : 1;
        unsigned char has_dirq : 1;
index f191255..b30fe91 100644 (file)
@@ -74,6 +74,7 @@ extern unsigned int zlib_dfltcc_support;
 
 extern int noexec_disabled;
 extern unsigned long ident_map_size;
+extern unsigned long max_mappable;
 
 /* The Write Back bit position in the physaddr is given by the SLPC PCI */
 extern unsigned long mio_wb_bit_mask;
@@ -117,14 +118,6 @@ extern unsigned int console_irq;
 #define SET_CONSOLE_VT220      do { console_mode = 4; } while (0)
 #define SET_CONSOLE_HVC                do { console_mode = 5; } while (0)
 
-#ifdef CONFIG_PFAULT
-extern int pfault_init(void);
-extern void pfault_fini(void);
-#else /* CONFIG_PFAULT */
-#define pfault_init()          ({-1;})
-#define pfault_fini()          do { } while (0)
-#endif /* CONFIG_PFAULT */
-
 #ifdef CONFIG_VMCP
 void vmcp_cma_reserve(void);
 #else
index d6bb2f4..d2cd42b 100644 (file)
@@ -463,6 +463,7 @@ static inline int is_prot_virt_host(void)
        return prot_virt_host;
 }
 
+int uv_pin_shared(unsigned long paddr);
 int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
 int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
 int uv_destroy_owned_page(unsigned long paddr);
@@ -475,6 +476,11 @@ void setup_uv(void);
 #define is_prot_virt_host() 0
 static inline void setup_uv(void) {}
 
+static inline int uv_pin_shared(unsigned long paddr)
+{
+       return 0;
+}
+
 static inline int uv_destroy_owned_page(unsigned long paddr)
 {
        return 0;
index 5faf0a1..5ad7647 100644 (file)
@@ -26,7 +26,7 @@
 #define MAXCLRKEYSIZE  32         /* a clear key value may be up to 32 bytes */
 #define MAXAESCIPHERKEYSIZE 136  /* our aes cipher keys have always 136 bytes */
 #define MINEP11AESKEYBLOBSIZE 256  /* min EP11 AES key blob size  */
-#define MAXEP11AESKEYBLOBSIZE 320  /* max EP11 AES key blob size */
+#define MAXEP11AESKEYBLOBSIZE 336  /* max EP11 AES key blob size */
 
 /* Minimum size of a key blob */
 #define MINKEYBLOBSIZE SECKEYBLOBSIZE
index f0fe3bc..bb08260 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef _UAPI_S390_PTRACE_H
 #define _UAPI_S390_PTRACE_H
 
+#include <linux/const.h>
+
 /*
  * Offsets in the user_regs_struct. They are used for the ptrace
  * system call and in entry.S
index 6b2a051..0df2b88 100644 (file)
@@ -37,9 +37,9 @@ CFLAGS_unwind_bc.o    += -fno-optimize-sibling-calls
 obj-y  := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o
 obj-y  += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
 obj-y  += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o
-obj-y  += sysinfo.o lgr.o os_info.o machine_kexec.o
+obj-y  += sysinfo.o lgr.o os_info.o
 obj-y  += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
-obj-y  += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
+obj-y  += entry.o reipl.o kdebugfs.o alternative.o
 obj-y  += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
 obj-y  += smp.o text_amode31.o stacktrace.o abs_lowcore.o
 
@@ -63,12 +63,13 @@ obj-$(CONFIG_RETHOOK)               += rethook.o
 obj-$(CONFIG_FUNCTION_TRACER)  += ftrace.o
 obj-$(CONFIG_FUNCTION_TRACER)  += mcount.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
+obj-$(CONFIG_KEXEC_CORE)       += machine_kexec.o relocate_kernel.o
 obj-$(CONFIG_UPROBES)          += uprobes.o
 obj-$(CONFIG_JUMP_LABEL)       += jump_label.o
 
 obj-$(CONFIG_KEXEC_FILE)       += machine_kexec_file.o kexec_image.o
 obj-$(CONFIG_KEXEC_FILE)       += kexec_elf.o
-
+obj-$(CONFIG_CERT_STORE)       += cert_store.o
 obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT)   += ima_arch.o
 
 obj-$(CONFIG_PERF_EVENTS)      += perf_event.o
index 81cf720..fa5f688 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/sched.h>
 #include <linux/purgatory.h>
 #include <linux/pgtable.h>
+#include <linux/ftrace.h>
 #include <asm/idle.h>
 #include <asm/gmap.h>
 #include <asm/stacktrace.h>
@@ -177,5 +178,13 @@ int main(void)
        DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size));
        DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line));
        DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size));
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       /* function graph return value tracing */
+       OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2);
+       OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp);
+       DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs));
+#endif
+       OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
+       DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
        return 0;
 }
diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c
new file mode 100644 (file)
index 0000000..3986a04
--- /dev/null
@@ -0,0 +1,811 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DIAG 0x320 support and certificate store handling
+ *
+ * Copyright IBM Corp. 2023
+ * Author(s):  Anastasia Eskova <anastasia.eskova@ibm.com>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/key-type.h>
+#include <linux/key.h>
+#include <linux/keyctl.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+#include <crypto/sha2.h>
+#include <keys/user-type.h>
+#include <asm/debug.h>
+#include <asm/diag.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+
+#define DIAG_MAX_RETRIES               10
+
+#define VCE_FLAGS_VALID_MASK           0x80
+
+#define ISM_LEN_DWORDS                 4
+#define VCSSB_LEN_BYTES                        128
+#define VCSSB_LEN_NO_CERTS             4
+#define VCB_LEN_NO_CERTS               64
+#define VC_NAME_LEN_BYTES              64
+
+#define CERT_STORE_KEY_TYPE_NAME       "cert_store_key"
+#define CERT_STORE_KEYRING_NAME                "cert_store"
+
+static debug_info_t *cert_store_dbf;
+static debug_info_t *cert_store_hexdump;
+
+#define pr_dbf_msg(fmt, ...) \
+       debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__)
+
+enum diag320_subcode {
+       DIAG320_SUBCODES        = 0,
+       DIAG320_STORAGE         = 1,
+       DIAG320_CERT_BLOCK      = 2,
+};
+
+enum diag320_rc {
+       DIAG320_RC_OK           = 0x0001,
+       DIAG320_RC_CS_NOMATCH   = 0x0306,
+};
+
+/* Verification Certificates Store Support Block (VCSSB). */
+struct vcssb {
+       u32 vcssb_length;
+       u8  pad_0x04[3];
+       u8  version;
+       u8  pad_0x08[8];
+       u32 cs_token;
+       u8  pad_0x14[12];
+       u16 total_vc_index_count;
+       u16 max_vc_index_count;
+       u8  pad_0x24[28];
+       u32 max_vce_length;
+       u32 max_vcxe_length;
+       u8  pad_0x48[8];
+       u32 max_single_vcb_length;
+       u32 total_vcb_length;
+       u32 max_single_vcxb_length;
+       u32 total_vcxb_length;
+       u8  pad_0x60[32];
+} __packed __aligned(8);
+
+/* Verification Certificate Entry (VCE) Header. */
+struct vce_header {
+       u32 vce_length;
+       u8  flags;
+       u8  key_type;
+       u16 vc_index;
+       u8  vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */
+       u8  vc_format;
+       u8  pad_0x49;
+       u16 key_id_length;
+       u8  pad_0x4c;
+       u8  vc_hash_type;
+       u16 vc_hash_length;
+       u8  pad_0x50[4];
+       u32 vc_length;
+       u8  pad_0x58[8];
+       u16 vc_hash_offset;
+       u16 vc_offset;
+       u8  pad_0x64[28];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB) Header. */
+struct vcb_header {
+       u32 vcb_input_length;
+       u8  pad_0x04[4];
+       u16 first_vc_index;
+       u16 last_vc_index;
+       u32 pad_0x0c;
+       u32 cs_token;
+       u8  pad_0x14[12];
+       u32 vcb_output_length;
+       u8  pad_0x24[3];
+       u8  version;
+       u16 stored_vc_count;
+       u16 remaining_vc_count;
+       u8  pad_0x2c[20];
+} __packed __aligned(4);
+
+/* Verification Certificate Block (VCB). */
+struct vcb {
+       struct vcb_header vcb_hdr;
+       u8 vcb_buf[];
+} __packed __aligned(4);
+
+/* Verification Certificate Entry (VCE). */
+struct vce {
+       struct vce_header vce_hdr;
+       u8 cert_data_buf[];
+} __packed __aligned(4);
+
+static void cert_store_key_describe(const struct key *key, struct seq_file *m)
+{
+       char ascii[VC_NAME_LEN_BYTES + 1];
+
+       /*
+        * First 64 bytes of the key description is key name in EBCDIC CP 500.
+        * Convert it to ASCII for displaying in /proc/keys.
+        */
+       strscpy(ascii, key->description, sizeof(ascii));
+       EBCASC_500(ascii, VC_NAME_LEN_BYTES);
+       seq_puts(m, ascii);
+
+       seq_puts(m, &key->description[VC_NAME_LEN_BYTES]);
+       if (key_is_positive(key))
+               seq_printf(m, ": %u", key->datalen);
+}
+
+/*
+ * Certificate store key type takes over properties of
+ * user key but cannot be updated.
+ */
+static struct key_type key_type_cert_store_key = {
+       .name           = CERT_STORE_KEY_TYPE_NAME,
+       .preparse       = user_preparse,
+       .free_preparse  = user_free_preparse,
+       .instantiate    = generic_key_instantiate,
+       .revoke         = user_revoke,
+       .destroy        = user_destroy,
+       .describe       = cert_store_key_describe,
+       .read           = user_read,
+};
+
+/* Logging functions. */
+static void pr_dbf_vcb(const struct vcb *b)
+{
+       pr_dbf_msg("VCB Header:");
+       pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length);
+       pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index);
+       pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index);
+       pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token);
+       pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length);
+       pr_dbf_msg("version: %d", b->vcb_hdr.version);
+       pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count);
+       pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count);
+}
+
+static void pr_dbf_vce(const struct vce *e)
+{
+       unsigned char vc_name[VC_NAME_LEN_BYTES + 1];
+       char log_string[VC_NAME_LEN_BYTES + 40];
+
+       pr_dbf_msg("VCE Header:");
+       pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length);
+       pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags);
+       pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type);
+       pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index);
+       pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format);
+       pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length);
+       pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type);
+       pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length);
+       pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset);
+       pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length);
+       pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset);
+
+       /* Certificate name in ASCII. */
+       memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES);
+       EBCASC_500(vc_name, VC_NAME_LEN_BYTES);
+       vc_name[VC_NAME_LEN_BYTES] = '\0';
+
+       snprintf(log_string, sizeof(log_string),
+                "index: %d vce_hdr.vc_name (ASCII): %s",
+                e->vce_hdr.vc_index, vc_name);
+       debug_text_event(cert_store_hexdump, 3, log_string);
+
+       /* Certificate data. */
+       debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start");
+       debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128);
+       debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end");
+       debug_event(cert_store_hexdump, 3,
+                   (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128);
+}
+
+static void pr_dbf_vcssb(const struct vcssb *s)
+{
+       debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1");
+       debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES);
+
+       pr_dbf_msg("VCSSB:");
+       pr_dbf_msg("vcssb_length: %u", s->vcssb_length);
+       pr_dbf_msg("version: %u", s->version);
+       pr_dbf_msg("cs_token: %u", s->cs_token);
+       pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count);
+       pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count);
+       pr_dbf_msg("max_vce_length: %u", s->max_vce_length);
+       pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length);
+       pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length);
+       pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length);
+       pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length);
+       pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length);
+}
+
+static int __diag320(unsigned long subcode, void *addr)
+{
+       union register_pair rp = { .even = (unsigned long)addr, };
+
+       asm volatile(
+               "       diag    %[rp],%[subcode],0x320\n"
+               "0:     nopr    %%r7\n"
+               EX_TABLE(0b, 0b)
+               : [rp] "+d" (rp.pair)
+               : [subcode] "d" (subcode)
+               : "cc", "memory");
+
+       return rp.odd;
+}
+
+static int diag320(unsigned long subcode, void *addr)
+{
+       diag_stat_inc(DIAG_STAT_X320);
+
+       return __diag320(subcode, addr);
+}
+
+/*
+ * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in
+ * VCE. Return -EINVAL if hashes don't match.
+ */
+static int check_certificate_hash(const struct vce *vce)
+{
+       u8 hash[SHA256_DIGEST_SIZE];
+       u16 vc_hash_length;
+       u8 *vce_hash;
+
+       vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset;
+       vc_hash_length = vce->vce_hdr.vc_hash_length;
+       sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash);
+       if (memcmp(vce_hash, hash, vc_hash_length) == 0)
+               return 0;
+
+       pr_dbf_msg("SHA256 hash of received certificate does not match");
+       debug_text_event(cert_store_hexdump, 3, "VCE hash:");
+       debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE);
+       debug_text_event(cert_store_hexdump, 3, "Calculated hash:");
+       debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE);
+
+       return -EINVAL;
+}
+
+static int check_certificate_valid(const struct vce *vce)
+{
+       if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) {
+               pr_dbf_msg("Certificate entry is invalid");
+               return -EINVAL;
+       }
+       if (vce->vce_hdr.vc_format != 1) {
+               pr_dbf_msg("Certificate format is not supported");
+               return -EINVAL;
+       }
+       if (vce->vce_hdr.vc_hash_type != 1) {
+               pr_dbf_msg("Hash type is not supported");
+               return -EINVAL;
+       }
+
+       return check_certificate_hash(vce);
+}
+
+static struct key *get_user_session_keyring(void)
+{
+       key_ref_t us_keyring_ref;
+
+       us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING,
+                                        KEY_LOOKUP_CREATE, KEY_NEED_LINK);
+       if (IS_ERR(us_keyring_ref)) {
+               pr_dbf_msg("Couldn't get user session keyring: %ld",
+                          PTR_ERR(us_keyring_ref));
+               return ERR_PTR(-ENOKEY);
+       }
+       key_ref_put(us_keyring_ref);
+       return key_ref_to_ptr(us_keyring_ref);
+}
+
+/* Invalidate all keys from cert_store keyring. */
+static int invalidate_keyring_keys(struct key *keyring)
+{
+       unsigned long num_keys, key_index;
+       size_t keyring_payload_len;
+       key_serial_t *key_array;
+       struct key *current_key;
+       int rc;
+
+       keyring_payload_len = key_type_keyring.read(keyring, NULL, 0);
+       num_keys = keyring_payload_len / sizeof(key_serial_t);
+       key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL);
+       if (!key_array)
+               return -ENOMEM;
+
+       rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len);
+       if (rc != keyring_payload_len) {
+               pr_dbf_msg("Couldn't read keyring payload");
+               goto out;
+       }
+
+       for (key_index = 0; key_index < num_keys; key_index++) {
+               current_key = key_lookup(key_array[key_index]);
+               pr_dbf_msg("Invalidating key %08x", current_key->serial);
+
+               key_invalidate(current_key);
+               key_put(current_key);
+               rc = key_unlink(keyring, current_key);
+               if (rc) {
+                       pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc);
+                       break;
+               }
+       }
+out:
+       kfree(key_array);
+       return rc;
+}
+
+static struct key *find_cs_keyring(void)
+{
+       key_ref_t cs_keyring_ref;
+       struct key *cs_keyring;
+
+       cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true),
+                                       &key_type_keyring, CERT_STORE_KEYRING_NAME,
+                                       false);
+       if (!IS_ERR(cs_keyring_ref)) {
+               cs_keyring = key_ref_to_ptr(cs_keyring_ref);
+               key_ref_put(cs_keyring_ref);
+               goto found;
+       }
+       /* Search default locations: thread, process, session keyrings */
+       cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL);
+       if (IS_ERR(cs_keyring))
+               return NULL;
+       key_put(cs_keyring);
+found:
+       return cs_keyring;
+}
+
+static void cleanup_cs_keys(void)
+{
+       struct key *cs_keyring;
+
+       cs_keyring = find_cs_keyring();
+       if (!cs_keyring)
+               return;
+
+       pr_dbf_msg("Found cert_store keyring. Purging...");
+       /*
+        * Remove cert_store_key_type in case invalidation
+        * of old cert_store keys failed (= severe error).
+        */
+       if (invalidate_keyring_keys(cs_keyring))
+               unregister_key_type(&key_type_cert_store_key);
+
+       keyring_clear(cs_keyring);
+       key_invalidate(cs_keyring);
+       key_put(cs_keyring);
+       key_unlink(get_user_session_keyring(), cs_keyring);
+}
+
+static struct key *create_cs_keyring(void)
+{
+       static struct key *cs_keyring;
+
+       /* Cleanup previous cs_keyring and all associated keys if any. */
+       cleanup_cs_keys();
+       cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID,
+                                  GLOBAL_ROOT_GID, current_cred(),
+                                  (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ,
+                                  KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP,
+                                  NULL, get_user_session_keyring());
+       if (IS_ERR(cs_keyring)) {
+               pr_dbf_msg("Can't allocate cert_store keyring");
+               return NULL;
+       }
+
+       pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial);
+
+       /*
+        * In case a previous clean-up ran into an
+        * error and unregistered key type.
+        */
+       register_key_type(&key_type_cert_store_key);
+
+       return cs_keyring;
+}
+
+/*
+ * Allocate memory and create key description in format
+ * [key name in EBCDIC]:[VCE index]:[CS token].
+ * Return a pointer to key description or NULL if memory
+ * allocation failed. Memory should be freed by caller.
+ */
+static char *get_key_description(struct vcssb *vcssb, const struct vce *vce)
+{
+       size_t len, name_len;
+       u32 cs_token;
+       char *desc;
+
+       cs_token = vcssb->cs_token;
+       /* Description string contains "%64s:%04u:%08u\0". */
+       name_len = sizeof(vce->vce_hdr.vc_name);
+       len = name_len + 1 + 4 + 1 + 8 + 1;
+       desc = kmalloc(len, GFP_KERNEL);
+       if (!desc)
+               return NULL;
+
+       memcpy(desc, vce->vce_hdr.vc_name, name_len);
+       sprintf(desc + name_len, ":%04u:%08u", vce->vce_hdr.vc_index, cs_token);
+
+       return desc;
+}
+
+/*
+ * Create a key of type "cert_store_key" using the data from VCE for key
+ * payload and key description. Link the key to "cert_store" keyring.
+ */
+static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce,
+                              struct key *keyring)
+{
+       key_ref_t newkey;
+       char *desc;
+       int rc;
+
+       desc = get_key_description(vcssb, vce);
+       if (!desc)
+               return -ENOMEM;
+
+       newkey = key_create_or_update(
+               make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME,
+               desc, (u8 *)vce + vce->vce_hdr.vc_offset,
+               vce->vce_hdr.vc_length,
+               (KEY_POS_ALL & ~KEY_POS_SETATTR)  | KEY_USR_VIEW | KEY_USR_READ,
+               KEY_ALLOC_NOT_IN_QUOTA);
+
+       rc = PTR_ERR_OR_ZERO(newkey);
+       if (rc) {
+               pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc);
+               rc = -ENOKEY;
+               goto out;
+       }
+
+       key_ref_put(newkey);
+out:
+       kfree(desc);
+       return rc;
+}
+
+/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */
+static int get_vcssb(struct vcssb *vcssb)
+{
+       int diag320_rc;
+
+       memset(vcssb, 0, sizeof(*vcssb));
+       vcssb->vcssb_length = VCSSB_LEN_BYTES;
+       diag320_rc = diag320(DIAG320_STORAGE, vcssb);
+       pr_dbf_vcssb(vcssb);
+
+       if (diag320_rc != DIAG320_RC_OK) {
+               pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc);
+               return -EIO;
+       }
+       if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) {
+               pr_dbf_msg("No certificates available for current configuration");
+               return -ENOKEY;
+       }
+
+       return 0;
+}
+
+static u32 get_4k_mult_vcb_size(struct vcssb *vcssb)
+{
+       return round_up(vcssb->max_single_vcb_length, PAGE_SIZE);
+}
+
+/* Fill input fields of single-entry VCB that will be read by LPAR. */
+static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index)
+{
+       memset(vcb, 0, sizeof(*vcb));
+       vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb);
+       vcb->vcb_hdr.cs_token = vcssb->cs_token;
+
+       /* Request single entry. */
+       vcb->vcb_hdr.first_vc_index = index;
+       vcb->vcb_hdr.last_vc_index = index;
+}
+
+static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce)
+{
+       struct vce *extracted_vce;
+
+       extracted_vce = (struct vce *)vcb->vcb_buf;
+       memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length);
+       pr_dbf_vce(vce);
+}
+
+static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb)
+{
+       int rc, diag320_rc;
+
+       fill_vcb_input(vcssb, vcb, index);
+
+       diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb);
+       pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc);
+       pr_dbf_vcb(vcb);
+
+       switch (diag320_rc) {
+       case DIAG320_RC_OK:
+               rc = 0;
+               if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) {
+                       pr_dbf_msg("No certificate entry for index %u", index);
+                       rc = -ENOKEY;
+               } else if (vcb->vcb_hdr.remaining_vc_count != 0) {
+                       /* Retry on insufficient space. */
+                       pr_dbf_msg("Couldn't get all requested certificates");
+                       rc = -EAGAIN;
+               }
+               break;
+       case DIAG320_RC_CS_NOMATCH:
+               pr_dbf_msg("Certificate Store token mismatch");
+               rc = -EAGAIN;
+               break;
+       default:
+               pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc);
+               rc = -EINVAL;
+               break;
+       }
+
+       return rc;
+}
+
+/*
+ * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call,
+ * extract VCE and create a key from its' certificate.
+ */
+static int create_key_from_sevcb(struct vcssb *vcssb, u16 index,
+                                struct key *keyring)
+{
+       struct vcb *vcb;
+       struct vce *vce;
+       int rc;
+
+       rc = -ENOMEM;
+       vcb = vmalloc(get_4k_mult_vcb_size(vcssb));
+       vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr));
+       if (!vcb || !vce)
+               goto out;
+
+       rc = get_sevcb(vcssb, index, vcb);
+       if (rc)
+               goto out;
+
+       extract_vce_from_sevcb(vcb, vce);
+       rc = check_certificate_valid(vce);
+       if (rc)
+               goto out;
+
+       rc = create_key_from_vce(vcssb, vce, keyring);
+       if (rc)
+               goto out;
+
+       pr_dbf_msg("Successfully created key from Certificate Entry %d", index);
+out:
+       vfree(vce);
+       vfree(vcb);
+       return rc;
+}
+
+/*
+ * Request a single-entry VCB for each VCE available for the partition.
+ * Create a key from it and link it to cert_store keyring. If no keys
+ * could be created (i.e. VCEs were invalid) return -ENOKEY.
+ */
+static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring)
+{
+       int rc, index, count, added;
+
+       count = 0;
+       added = 0;
+       /* Certificate Store entries indices start with 1 and have no gaps. */
+       for (index = 1; index < vcssb->total_vc_index_count + 1; index++) {
+               pr_dbf_msg("Creating key from VCE %u", index);
+               rc = create_key_from_sevcb(vcssb, index, keyring);
+               count++;
+
+               if (rc == -EAGAIN)
+                       return rc;
+
+               if (rc)
+                       pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc);
+               else
+                       added++;
+       }
+
+       if (added == 0) {
+               pr_dbf_msg("Processed %d entries. No keys created", count);
+               return -ENOKEY;
+       }
+
+       pr_info("Added %d of %d keys to cert_store keyring", added, count);
+
+       /*
+        * Do not allow to link more keys to certificate store keyring after all
+        * the VCEs were processed.
+        */
+       rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL);
+       if (rc)
+               pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc);
+
+       return 0;
+}
+
+/*
+ * Check which DIAG320 subcodes are installed.
+ * Return -ENOENT if subcodes 1 or 2 are not available.
+ */
+static int query_diag320_subcodes(void)
+{
+       unsigned long ism[ISM_LEN_DWORDS];
+       int rc;
+
+       rc = diag320(0, ism);
+       if (rc != DIAG320_RC_OK) {
+               pr_dbf_msg("DIAG320 subcode query returned %04x", rc);
+               return -ENOENT;
+       }
+
+       debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0");
+       debug_event(cert_store_hexdump, 3, ism, sizeof(ism));
+
+       if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) {
+               pr_dbf_msg("Not all required DIAG320 subcodes are installed");
+               return -ENOENT;
+       }
+
+       return 0;
+}
+
+/*
+ * Check if Certificate Store is supported by the firmware and DIAG320 subcodes
+ * 1 and 2 are installed. Create cert_store keyring and link all certificates
+ * available for the current partition to it as "cert_store_key" type
+ * keys. On refresh or error invalidate cert_store keyring and destroy
+ * all keys of "cert_store_key" type.
+ */
+static int fill_cs_keyring(void)
+{
+       struct key *cs_keyring;
+       struct vcssb *vcssb;
+       int rc;
+
+       rc = -ENOMEM;
+       vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL);
+       if (!vcssb)
+               goto cleanup_keys;
+
+       rc = -ENOENT;
+       if (!sclp.has_diag320) {
+               pr_dbf_msg("Certificate Store is not supported");
+               goto cleanup_keys;
+       }
+
+       rc = query_diag320_subcodes();
+       if (rc)
+               goto cleanup_keys;
+
+       rc = get_vcssb(vcssb);
+       if (rc)
+               goto cleanup_keys;
+
+       rc = -ENOMEM;
+       cs_keyring = create_cs_keyring();
+       if (!cs_keyring)
+               goto cleanup_keys;
+
+       rc = add_certificates_to_keyring(vcssb, cs_keyring);
+       if (rc)
+               goto cleanup_cs_keyring;
+
+       goto out;
+
+cleanup_cs_keyring:
+       key_put(cs_keyring);
+cleanup_keys:
+       cleanup_cs_keys();
+out:
+       kfree(vcssb);
+       return rc;
+}
+
+static DEFINE_MUTEX(cs_refresh_lock);
+static int cs_status_val = -1;
+
+static ssize_t cs_status_show(struct kobject *kobj,
+                             struct kobj_attribute *attr, char *buf)
+{
+       if (cs_status_val == -1)
+               return sysfs_emit(buf, "uninitialized\n");
+       else if (cs_status_val == 0)
+               return sysfs_emit(buf, "ok\n");
+
+       return sysfs_emit(buf, "failed (%d)\n", cs_status_val);
+}
+
+static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status);
+
+static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr,
+                            const char *buf, size_t count)
+{
+       int rc, retries;
+
+       pr_dbf_msg("Refresh certificate store information requested");
+       rc = mutex_lock_interruptible(&cs_refresh_lock);
+       if (rc)
+               return rc;
+
+       for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) {
+               /* Request certificates from certificate store. */
+               rc = fill_cs_keyring();
+               if (rc)
+                       pr_dbf_msg("Failed to refresh certificate store information (%d)", rc);
+               if (rc != -EAGAIN)
+                       break;
+       }
+       cs_status_val = rc;
+       mutex_unlock(&cs_refresh_lock);
+
+       return rc ?: count;
+}
+
+static struct kobj_attribute refresh_attr = __ATTR_WO(refresh);
+
+static const struct attribute *cert_store_attrs[] __initconst = {
+       &cs_status_attr.attr,
+       &refresh_attr.attr,
+       NULL,
+};
+
+static struct kobject *cert_store_kobj;
+
+static int __init cert_store_init(void)
+{
+       int rc = -ENOMEM;
+
+       cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64);
+       if (!cert_store_dbf)
+               goto cleanup_dbf;
+
+       cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128);
+       if (!cert_store_hexdump)
+               goto cleanup_dbf;
+
+       debug_register_view(cert_store_hexdump, &debug_hex_ascii_view);
+       debug_register_view(cert_store_dbf, &debug_sprintf_view);
+
+       /* Create directory /sys/firmware/cert_store. */
+       cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj);
+       if (!cert_store_kobj)
+               goto cleanup_dbf;
+
+       rc = sysfs_create_files(cert_store_kobj, cert_store_attrs);
+       if (rc)
+               goto cleanup_kobj;
+
+       register_key_type(&key_type_cert_store_key);
+
+       return rc;
+
+cleanup_kobj:
+       kobject_put(cert_store_kobj);
+cleanup_dbf:
+       debug_unregister(cert_store_dbf);
+       debug_unregister(cert_store_hexdump);
+
+       return rc;
+}
+device_initcall(cert_store_init);
index 82079f2..f9f06cd 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/cpu.h>
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-extable.h>
 #include <asm/diag.h>
 #include <asm/trace/diag.h>
@@ -50,6 +51,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
        [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" },
        [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" },
        [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" },
+       [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" },
        [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
 };
 
@@ -167,8 +169,29 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad
        return rp.odd;
 }
 
+/**
+ * diag204() - Issue diagnose 204 call.
+ * @subcode: Subcode of diagnose 204 to be executed.
+ * @size: Size of area in pages which @area points to, if given.
+ * @addr: Vmalloc'ed memory area where the result is written to.
+ *
+ * Execute diagnose 204 with the given subcode and write the result to the
+ * memory area specified with @addr. For subcodes which do not write a
+ * result to memory both @size and @addr must be zero. If @addr is
+ * specified it must be page aligned and must have been allocated with
+ * vmalloc(). Conversion to real / physical addresses will be handled by
+ * this function if required.
+ */
 int diag204(unsigned long subcode, unsigned long size, void *addr)
 {
+       if (addr) {
+               if (WARN_ON_ONCE(!is_vmalloc_addr(addr)))
+                       return -1;
+               if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE)))
+                       return -1;
+       }
+       if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4)
+               addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr));
        diag_stat_inc(DIAG_STAT_X204);
        size = __diag204(&subcode, size, addr);
        if (subcode)
@@ -200,7 +223,7 @@ int diag210(struct diag210 *addr)
 EXPORT_SYMBOL(diag210);
 
 /*
- * Diagnose 210: Get information about a virtual device
+ * Diagnose 8C: Access 3270 Display Device Information
  */
 int diag8c(struct diag8c *addr, struct ccw_dev_id *devno)
 {
index 7f8246c..0e51fa5 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- *    ECBDIC -> ASCII, ASCII -> ECBDIC,
+ *    EBCDIC -> ASCII, ASCII -> EBCDIC,
  *    upper to lower case (EBCDIC) conversion tables.
  *
  *  S390 version
index a660f4b..49a11f6 100644 (file)
@@ -8,6 +8,7 @@
  *              Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
  */
 
+#include <linux/export.h>
 #include <linux/init.h>
 #include <linux/linkage.h>
 #include <asm/asm-extable.h>
@@ -26,7 +27,6 @@
 #include <asm/vx-insn.h>
 #include <asm/setup.h>
 #include <asm/nmi.h>
-#include <asm/export.h>
 #include <asm/nospec-insn.h>
 
 _LPP_OFFSET    = __LC_LPP
index 85a00d9..05e5166 100644 (file)
@@ -266,7 +266,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,        \
                struct kobj_attribute *attr,                            \
                const char *buf, size_t len)                            \
 {                                                                      \
-       strncpy(_value, buf, sizeof(_value) - 1);                       \
+       strscpy(_value, buf, sizeof(_value));                           \
        strim(_value);                                                  \
        return len;                                                     \
 }                                                                      \
@@ -557,15 +557,12 @@ static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
        __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
 
 static struct attribute *ipl_fcp_attrs[] = {
-       &sys_ipl_type_attr.attr,
        &sys_ipl_device_attr.attr,
        &sys_ipl_fcp_wwpn_attr.attr,
        &sys_ipl_fcp_lun_attr.attr,
        &sys_ipl_fcp_bootprog_attr.attr,
        &sys_ipl_fcp_br_lba_attr.attr,
        &sys_ipl_ccw_loadparm_attr.attr,
-       &sys_ipl_secure_attr.attr,
-       &sys_ipl_has_secure_attr.attr,
        NULL,
 };
 
@@ -575,14 +572,11 @@ static struct attribute_group ipl_fcp_attr_group = {
 };
 
 static struct attribute *ipl_nvme_attrs[] = {
-       &sys_ipl_type_attr.attr,
        &sys_ipl_nvme_fid_attr.attr,
        &sys_ipl_nvme_nsid_attr.attr,
        &sys_ipl_nvme_bootprog_attr.attr,
        &sys_ipl_nvme_br_lba_attr.attr,
        &sys_ipl_ccw_loadparm_attr.attr,
-       &sys_ipl_secure_attr.attr,
-       &sys_ipl_has_secure_attr.attr,
        NULL,
 };
 
@@ -592,13 +586,10 @@ static struct attribute_group ipl_nvme_attr_group = {
 };
 
 static struct attribute *ipl_eckd_attrs[] = {
-       &sys_ipl_type_attr.attr,
        &sys_ipl_eckd_bootprog_attr.attr,
        &sys_ipl_eckd_br_chr_attr.attr,
        &sys_ipl_ccw_loadparm_attr.attr,
        &sys_ipl_device_attr.attr,
-       &sys_ipl_secure_attr.attr,
-       &sys_ipl_has_secure_attr.attr,
        NULL,
 };
 
@@ -610,21 +601,15 @@ static struct attribute_group ipl_eckd_attr_group = {
 /* CCW ipl device attributes */
 
 static struct attribute *ipl_ccw_attrs_vm[] = {
-       &sys_ipl_type_attr.attr,
        &sys_ipl_device_attr.attr,
        &sys_ipl_ccw_loadparm_attr.attr,
        &sys_ipl_vm_parm_attr.attr,
-       &sys_ipl_secure_attr.attr,
-       &sys_ipl_has_secure_attr.attr,
        NULL,
 };
 
 static struct attribute *ipl_ccw_attrs_lpar[] = {
-       &sys_ipl_type_attr.attr,
        &sys_ipl_device_attr.attr,
        &sys_ipl_ccw_loadparm_attr.attr,
-       &sys_ipl_secure_attr.attr,
-       &sys_ipl_has_secure_attr.attr,
        NULL,
 };
 
@@ -636,15 +621,15 @@ static struct attribute_group ipl_ccw_attr_group_lpar = {
        .attrs = ipl_ccw_attrs_lpar
 };
 
-/* UNKNOWN ipl device attributes */
-
-static struct attribute *ipl_unknown_attrs[] = {
+static struct attribute *ipl_common_attrs[] = {
        &sys_ipl_type_attr.attr,
+       &sys_ipl_secure_attr.attr,
+       &sys_ipl_has_secure_attr.attr,
        NULL,
 };
 
-static struct attribute_group ipl_unknown_attr_group = {
-       .attrs = ipl_unknown_attrs,
+static struct attribute_group ipl_common_attr_group = {
+       .attrs = ipl_common_attrs,
 };
 
 static struct kset *ipl_kset;
@@ -668,6 +653,9 @@ static int __init ipl_init(void)
                rc = -ENOMEM;
                goto out;
        }
+       rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group);
+       if (rc)
+               goto out;
        switch (ipl_info.type) {
        case IPL_TYPE_CCW:
                if (MACHINE_IS_VM)
@@ -689,8 +677,6 @@ static int __init ipl_init(void)
                rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group);
                break;
        default:
-               rc = sysfs_create_group(&ipl_kset->kobj,
-                                       &ipl_unknown_attr_group);
                break;
        }
 out:
index 6d9276c..12a2bd4 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/reboot.h>
 #include <linux/ftrace.h>
 #include <linux/debug_locks.h>
+#include <asm/pfault.h>
 #include <asm/cio.h>
 #include <asm/setup.h>
 #include <asm/smp.h>
index 2df94d3..8d207b8 100644 (file)
@@ -188,7 +188,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
        data->memsz = ALIGN(data->memsz, PAGE_SIZE);
        buf.mem = data->memsz;
 
-       ptr = (void *)ipl_cert_list_addr;
+       ptr = __va(ipl_cert_list_addr);
        end = ptr + ipl_cert_list_size;
        ncerts = 0;
        while (ptr < end) {
@@ -200,7 +200,7 @@ static int kexec_file_add_ipl_report(struct kimage *image,
 
        addr = data->memsz + data->report->size;
        addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
-       ptr = (void *)ipl_cert_list_addr;
+       ptr = __va(ipl_cert_list_addr);
        while (ptr < end) {
                len = *(unsigned int *)ptr;
                ptr += sizeof(len);
index dbece28..ae4d4fd 100644 (file)
@@ -9,15 +9,20 @@
 #include <asm/ftrace.h>
 #include <asm/nospec-insn.h>
 #include <asm/ptrace.h>
-#include <asm/export.h>
 
+#define STACK_FRAME_SIZE_PTREGS                (STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS                   (STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS              (STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW               (STACK_PTREGS + __PT_PSW)
+
+#define STACK_FRAME_SIZE_FREGS         (STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE)
+#define STACK_FREGS                    (STACK_FRAME_OVERHEAD)
+#define STACK_FREGS_PTREGS             (STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS)
+#define STACK_FREGS_PTREGS_GPRS                (STACK_FREGS_PTREGS + __PT_GPRS)
+#define STACK_FREGS_PTREGS_PSW         (STACK_FREGS_PTREGS + __PT_PSW)
+#define STACK_FREGS_PTREGS_ORIG_GPR2   (STACK_FREGS_PTREGS + __PT_ORIG_GPR2)
+#define STACK_FREGS_PTREGS_FLAGS       (STACK_FREGS_PTREGS + __PT_FLAGS)
 
-#define STACK_FRAME_SIZE       (STACK_FRAME_OVERHEAD + __PT_SIZE)
-#define STACK_PTREGS           (STACK_FRAME_OVERHEAD)
-#define STACK_PTREGS_GPRS      (STACK_PTREGS + __PT_GPRS)
-#define STACK_PTREGS_PSW       (STACK_PTREGS + __PT_PSW)
-#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2)
-#define STACK_PTREGS_FLAGS     (STACK_PTREGS + __PT_FLAGS)
 /* packed stack: allocate just enough for r14, r15 and backchain */
 #define TRACED_FUNC_FRAME_SIZE 24
 
@@ -53,23 +58,23 @@ SYM_CODE_END(ftrace_stub_direct_tramp)
        stg     %r1,__SF_BACKCHAIN(%r15)
        stg     %r0,(__SF_GPRS+8*8)(%r15)
        stg     %r15,(__SF_GPRS+9*8)(%r15)
-       # allocate pt_regs and stack frame for ftrace_trace_function
-       aghi    %r15,-STACK_FRAME_SIZE
-       stg     %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
-       xc      STACK_PTREGS_ORIG_GPR2(8,%r15),STACK_PTREGS_ORIG_GPR2(%r15)
+       # allocate ftrace_regs and stack frame for ftrace_trace_function
+       aghi    %r15,-STACK_FRAME_SIZE_FREGS
+       stg     %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15)
+       xc      STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
 
        .if \allregs == 1
-       stg     %r14,(STACK_PTREGS_PSW)(%r15)
-       mvghi   STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
+       stg     %r14,(STACK_FREGS_PTREGS_PSW)(%r15)
+       mvghi   STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS
        .else
-       xc      STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15)
+       xc      STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15)
        .endif
 
        lg      %r14,(__SF_GPRS+8*8)(%r1)       # restore original return address
        aghi    %r1,-TRACED_FUNC_FRAME_SIZE
        stg     %r1,__SF_BACKCHAIN(%r15)
-       stg     %r0,(STACK_PTREGS_PSW+8)(%r15)
-       stmg    %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
+       stg     %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
+       stmg    %r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
        .endm
 
 SYM_CODE_START(ftrace_regs_caller)
@@ -96,30 +101,30 @@ SYM_CODE_START(ftrace_common)
        lg      %r1,0(%r1)
 #endif
        lgr     %r3,%r14
-       la      %r5,STACK_PTREGS(%r15)
+       la      %r5,STACK_FREGS(%r15)
        BASR_EX %r14,%r1
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 # The j instruction gets runtime patched to a nop instruction.
 # See ftrace_enable_ftrace_graph_caller.
 SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL)
        j       .Lftrace_graph_caller_end
-       lmg     %r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15)
-       lg      %r4,(STACK_PTREGS_PSW+8)(%r15)
+       lmg     %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
+       lg      %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15)
        brasl   %r14,prepare_ftrace_return
-       stg     %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+       stg     %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15)
 .Lftrace_graph_caller_end:
 #endif
-       lg      %r0,(STACK_PTREGS_PSW+8)(%r15)
+       lg      %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15)
 #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
-       ltg     %r1,STACK_PTREGS_ORIG_GPR2(%r15)
+       ltg     %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
        locgrz  %r1,%r0
 #else
-       lg      %r1,STACK_PTREGS_ORIG_GPR2(%r15)
+       lg      %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15)
        ltgr    %r1,%r1
        jnz     0f
        lgr     %r1,%r0
 #endif
-0:     lmg     %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+0:     lmg     %r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15)
        BR_EX   %r1
 SYM_CODE_END(ftrace_common)
 
@@ -128,10 +133,14 @@ SYM_CODE_END(ftrace_common)
 SYM_FUNC_START(return_to_handler)
        stmg    %r2,%r5,32(%r15)
        lgr     %r1,%r15
-       aghi    %r15,-STACK_FRAME_OVERHEAD
+       aghi    %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE)
        stg     %r1,__SF_BACKCHAIN(%r15)
+       la      %r3,STACK_FRAME_OVERHEAD(%r15)
+       stg     %r1,__FGRAPH_RET_FP(%r3)
+       stg     %r2,__FGRAPH_RET_GPR2(%r3)
+       lgr     %r2,%r3
        brasl   %r14,ftrace_return_to_handler
-       aghi    %r15,STACK_FRAME_OVERHEAD
+       aghi    %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE
        lgr     %r14,%r2
        lmg     %r2,%r5,32(%r15)
        BR_EX   %r14
@@ -160,11 +169,11 @@ SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl)
 
 SYM_CODE_START(arch_rethook_trampoline)
        stg     %r14,(__SF_GPRS+8*8)(%r15)
-       lay     %r15,-STACK_FRAME_SIZE(%r15)
+       lay     %r15,-STACK_FRAME_SIZE_PTREGS(%r15)
        stmg    %r0,%r14,STACK_PTREGS_GPRS(%r15)
 
        # store original stack pointer in backchain and pt_regs
-       lay     %r7,STACK_FRAME_SIZE(%r15)
+       lay     %r7,STACK_FRAME_SIZE_PTREGS(%r15)
        stg     %r7,__SF_BACKCHAIN(%r15)
        stg     %r7,STACK_PTREGS_GPRS+(15*8)(%r15)
 
index 00d7644..c744104 100644 (file)
@@ -146,6 +146,7 @@ static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
 
 int __bootdata(noexec_disabled);
+unsigned long __bootdata_preserved(max_mappable);
 unsigned long __bootdata(ident_map_size);
 struct physmem_info __bootdata(physmem_info);
 
@@ -874,7 +875,7 @@ static void __init log_component_list(void)
                pr_info("Linux is running with Secure-IPL enabled\n");
        else
                pr_info("Linux is running with Secure-IPL disabled\n");
-       ptr = (void *) early_ipl_comp_list_addr;
+       ptr = __va(early_ipl_comp_list_addr);
        end = (void *) ptr + early_ipl_comp_list_size;
        pr_info("The IPL report contains the following components:\n");
        while (ptr < end) {
index f9a2b75..a4edb7e 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/crash_dump.h>
 #include <linux/kprobes.h>
 #include <asm/asm-offsets.h>
+#include <asm/pfault.h>
 #include <asm/diag.h>
 #include <asm/switch_to.h>
 #include <asm/facility.h>
@@ -252,8 +253,9 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
 
 static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
 {
-       struct lowcore *lc = lowcore_ptr[cpu];
+       struct lowcore *lc, *abs_lc;
 
+       lc = lowcore_ptr[cpu];
        cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
        cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
        lc->cpu_nr = cpu;
@@ -266,7 +268,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
        lc->machine_flags = S390_lowcore.machine_flags;
        lc->user_timer = lc->system_timer =
                lc->steal_timer = lc->avg_steal_timer = 0;
-       __ctl_store(lc->cregs_save_area, 0, 15);
+       abs_lc = get_abs_lowcore();
+       memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area));
+       put_abs_lowcore(abs_lc);
        lc->cregs_save_area[1] = lc->kernel_asce;
        lc->cregs_save_area[7] = lc->user_asce;
        save_access_regs((unsigned int *) lc->access_regs_save_area);
@@ -606,8 +610,8 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set)
        ctlreg = (ctlreg & parms.andval) | parms.orval;
        abs_lc->cregs_save_area[cr] = ctlreg;
        put_abs_lowcore(abs_lc);
-       spin_unlock(&ctl_lock);
        on_each_cpu(smp_ctl_bit_callback, &parms, 1);
+       spin_unlock(&ctl_lock);
 }
 EXPORT_SYMBOL(smp_ctl_set_clear_bit);
 
@@ -927,12 +931,18 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
        rc = pcpu_alloc_lowcore(pcpu, cpu);
        if (rc)
                return rc;
+       /*
+        * Make sure global control register contents do not change
+        * until new CPU has initialized control registers.
+        */
+       spin_lock(&ctl_lock);
        pcpu_prepare_secondary(pcpu, cpu);
        pcpu_attach_task(pcpu, tidle);
        pcpu_start_fn(pcpu, smp_start_secondary, NULL);
        /* Wait until cpu puts itself in the online & active maps */
        while (!cpu_online(cpu))
                cpu_relax();
+       spin_unlock(&ctl_lock);
        return 0;
 }
 
index 4d141e2..30bb204 100644 (file)
@@ -317,7 +317,9 @@ static void fill_diag(struct sthyi_sctns *sctns)
        if (pages <= 0)
                return;
 
-       diag204_buf = vmalloc(array_size(pages, PAGE_SIZE));
+       diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
+                                    PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+                                    __builtin_return_address(0));
        if (!diag204_buf)
                return;
 
@@ -459,9 +461,9 @@ static int sthyi_update_cache(u64 *rc)
  *
  * Fills the destination with system information returned by the STHYI
  * instruction. The data is generated by emulation or execution of STHYI,
- * if available. The return value is the condition code that would be
- * returned, the rc parameter is the return code which is passed in
- * register R2 + 1.
+ * if available. The return value is either a negative error value or
+ * the condition code that would be returned, the rc parameter is the
+ * return code which is passed in register R2 + 1.
  */
 int sthyi_fill(void *dst, u64 *rc)
 {
index a6935af..0122cc1 100644 (file)
 449  common    futex_waitv             sys_futex_waitv                 sys_futex_waitv
 450  common    set_mempolicy_home_node sys_set_mempolicy_home_node     sys_set_mempolicy_home_node
 451  common    cachestat               sys_cachestat                   sys_cachestat
+452  common    fchmodat2               sys_fchmodat2                   sys_fchmodat2
index 66f0eb1..b771f1b 100644 (file)
@@ -88,7 +88,7 @@ fail:
  * Requests the Ultravisor to pin the page in the shared state. This will
  * cause an intercept when the guest attempts to unshare the pinned page.
  */
-static int uv_pin_shared(unsigned long paddr)
+int uv_pin_shared(unsigned long paddr)
 {
        struct uv_cb_cfs uvcb = {
                .header.cmd = UVC_CMD_PIN_PAGE_SHARED,
@@ -100,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr)
                return -EINVAL;
        return 0;
 }
+EXPORT_SYMBOL_GPL(uv_pin_shared);
 
 /*
  * Requests the Ultravisor to destroy a guest page and make it
index 954d39a..341abaf 100644 (file)
@@ -389,8 +389,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
  */
 int handle_sthyi(struct kvm_vcpu *vcpu)
 {
-       int reg1, reg2, r = 0;
-       u64 code, addr, cc = 0, rc = 0;
+       int reg1, reg2, cc = 0, r = 0;
+       u64 code, addr, rc = 0;
        struct sthyi_sctns *sctns = NULL;
 
        if (!test_kvm_facility(vcpu->kvm, 74))
@@ -421,7 +421,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
                return -ENOMEM;
 
        cc = sthyi_fill(sctns, &rc);
-
+       if (cc < 0) {
+               free_page((unsigned long)sctns);
+               return cc;
+       }
 out:
        if (!cc) {
                if (kvm_s390_pv_cpu_is_protected(vcpu)) {
index 0261d42..a7ea80c 100644 (file)
@@ -270,18 +270,6 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu)
        return vcpu->arch.pv.handle;
 }
 
-static inline bool kvm_s390_pv_is_protected(struct kvm *kvm)
-{
-       lockdep_assert_held(&kvm->lock);
-       return !!kvm_s390_pv_get_handle(kvm);
-}
-
-static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
-{
-       lockdep_assert_held(&vcpu->mutex);
-       return !!kvm_s390_pv_cpu_get_handle(vcpu);
-}
-
 /* implemented in interrupt.c */
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
index 2f34c7c..8d3f39a 100644 (file)
 #include <linux/mmu_notifier.h>
 #include "kvm-s390.h"
 
+bool kvm_s390_pv_is_protected(struct kvm *kvm)
+{
+       lockdep_assert_held(&kvm->lock);
+       return !!kvm_s390_pv_get_handle(kvm);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected);
+
+bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu)
+{
+       lockdep_assert_held(&vcpu->mutex);
+       return !!kvm_s390_pv_cpu_get_handle(vcpu);
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected);
+
 /**
  * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
  * be destroyed
@@ -411,8 +425,12 @@ int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
        u16 _rc, _rrc;
        int cc = 0;
 
-       /* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */
-       atomic_inc(&kvm->mm->context.protected_count);
+       /*
+        * Nothing to do if the counter was already 0. Otherwise make sure
+        * the counter does not reach 0 before calling s390_uv_destroy_range.
+        */
+       if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
+               return 0;
 
        *rc = 1;
        /* If the current VM is protected, destroy it */
index 5a9a55d..08f60a4 100644 (file)
@@ -5,8 +5,8 @@
  * Copyright IBM Corp. 2012
  */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
-#include <asm/export.h>
 #include <asm/nospec-insn.h>
 
        GEN_BR_THUNK %r14
index de33cf0..96214f5 100644 (file)
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
+#include <linux/export.h>
 #include <linux/linkage.h>
 #include <asm/nospec-insn.h>
-#include <asm/export.h>
 
        .section .noinstr.text, "ax"
 
index d90db06..352ff52 100644 (file)
@@ -10,3 +10,4 @@ obj-$(CONFIG_CMM)             += cmm.o
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
 obj-$(CONFIG_PTDUMP_CORE)      += dump_pagetables.o
 obj-$(CONFIG_PGSTE)            += gmap.o
+obj-$(CONFIG_PFAULT)           += pfault.o
index 5300c68..f475153 100644 (file)
@@ -90,7 +90,7 @@ static long cmm_alloc_pages(long nr, long *counter,
                        } else
                                free_page((unsigned long) npa);
                }
-               diag10_range(virt_to_pfn(addr), 1);
+               diag10_range(virt_to_pfn((void *)addr), 1);
                pa->pages[pa->index++] = addr;
                (*counter)++;
                spin_unlock(&cmm_lock);
index ba5f802..afa5db7 100644 (file)
@@ -297,7 +297,7 @@ static int pt_dump_init(void)
        address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
        address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE;
        address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area;
-       address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE;
+       address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + MEMCPY_REAL_SIZE;
        address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
        address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size;
        address_markers[VMALLOC_NR].start_address = VMALLOC_START;
index 1bc42ce..e41869f 100644 (file)
@@ -640,10 +640,13 @@ void segment_warning(int rc, char *seg_name)
                pr_err("There is not enough memory to load or query "
                       "DCSS %s\n", seg_name);
                break;
-       case -ERANGE:
-               pr_err("DCSS %s exceeds the kernel mapping range (%lu) "
-                      "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS);
+       case -ERANGE: {
+               struct range mhp_range = arch_get_mappable_range();
+
+               pr_err("DCSS %s exceeds the kernel mapping range (%llu) "
+                      "and cannot be loaded\n", seg_name, mhp_range.end + 1);
                break;
+       }
        default:
                break;
        }
index dbe8394..b5e1bea 100644 (file)
@@ -43,8 +43,6 @@
 #include "../kernel/entry.h"
 
 #define __FAIL_ADDR_MASK -4096L
-#define __SUBCODE_MASK 0x0600
-#define __PF_RES_FIELD 0x8000000000000000ULL
 
 /*
  * Allocate private vm_fault_reason from top.  Please make sure it won't
@@ -421,6 +419,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
        vma_end_read(vma);
        if (!(fault & VM_FAULT_RETRY)) {
                count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+               if (likely(!(fault & VM_FAULT_ERROR)))
+                       fault = 0;
                goto out;
        }
        count_vm_vma_lock_event(VMA_LOCK_RETRY);
@@ -581,232 +581,6 @@ void do_dat_exception(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(do_dat_exception);
 
-#ifdef CONFIG_PFAULT 
-/*
- * 'pfault' pseudo page faults routines.
- */
-static int pfault_disable;
-
-static int __init nopfault(char *str)
-{
-       pfault_disable = 1;
-       return 1;
-}
-
-__setup("nopfault", nopfault);
-
-struct pfault_refbk {
-       u16 refdiagc;
-       u16 reffcode;
-       u16 refdwlen;
-       u16 refversn;
-       u64 refgaddr;
-       u64 refselmk;
-       u64 refcmpmk;
-       u64 reserved;
-} __attribute__ ((packed, aligned(8)));
-
-static struct pfault_refbk pfault_init_refbk = {
-       .refdiagc = 0x258,
-       .reffcode = 0,
-       .refdwlen = 5,
-       .refversn = 2,
-       .refgaddr = __LC_LPP,
-       .refselmk = 1ULL << 48,
-       .refcmpmk = 1ULL << 48,
-       .reserved = __PF_RES_FIELD
-};
-
-int pfault_init(void)
-{
-        int rc;
-
-       if (pfault_disable)
-               return -1;
-       diag_stat_inc(DIAG_STAT_X258);
-       asm volatile(
-               "       diag    %1,%0,0x258\n"
-               "0:     j       2f\n"
-               "1:     la      %0,8\n"
-               "2:\n"
-               EX_TABLE(0b,1b)
-               : "=d" (rc)
-               : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc");
-        return rc;
-}
-
-static struct pfault_refbk pfault_fini_refbk = {
-       .refdiagc = 0x258,
-       .reffcode = 1,
-       .refdwlen = 5,
-       .refversn = 2,
-};
-
-void pfault_fini(void)
-{
-
-       if (pfault_disable)
-               return;
-       diag_stat_inc(DIAG_STAT_X258);
-       asm volatile(
-               "       diag    %0,0,0x258\n"
-               "0:     nopr    %%r7\n"
-               EX_TABLE(0b,0b)
-               : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc");
-}
-
-static DEFINE_SPINLOCK(pfault_lock);
-static LIST_HEAD(pfault_list);
-
-#define PF_COMPLETE    0x0080
-
-/*
- * The mechanism of our pfault code: if Linux is running as guest, runs a user
- * space process and the user space process accesses a page that the host has
- * paged out we get a pfault interrupt.
- *
- * This allows us, within the guest, to schedule a different process. Without
- * this mechanism the host would have to suspend the whole virtual cpu until
- * the page has been paged in.
- *
- * So when we get such an interrupt then we set the state of the current task
- * to uninterruptible and also set the need_resched flag. Both happens within
- * interrupt context(!). If we later on want to return to user space we
- * recognize the need_resched flag and then call schedule().  It's not very
- * obvious how this works...
- *
- * Of course we have a lot of additional fun with the completion interrupt (->
- * host signals that a page of a process has been paged in and the process can
- * continue to run). This interrupt can arrive on any cpu and, since we have
- * virtual cpus, actually appear before the interrupt that signals that a page
- * is missing.
- */
-static void pfault_interrupt(struct ext_code ext_code,
-                            unsigned int param32, unsigned long param64)
-{
-       struct task_struct *tsk;
-       __u16 subcode;
-       pid_t pid;
-
-       /*
-        * Get the external interruption subcode & pfault initial/completion
-        * signal bit. VM stores this in the 'cpu address' field associated
-        * with the external interrupt.
-        */
-       subcode = ext_code.subcode;
-       if ((subcode & 0xff00) != __SUBCODE_MASK)
-               return;
-       inc_irq_stat(IRQEXT_PFL);
-       /* Get the token (= pid of the affected task). */
-       pid = param64 & LPP_PID_MASK;
-       rcu_read_lock();
-       tsk = find_task_by_pid_ns(pid, &init_pid_ns);
-       if (tsk)
-               get_task_struct(tsk);
-       rcu_read_unlock();
-       if (!tsk)
-               return;
-       spin_lock(&pfault_lock);
-       if (subcode & PF_COMPLETE) {
-               /* signal bit is set -> a page has been swapped in by VM */
-               if (tsk->thread.pfault_wait == 1) {
-                       /* Initial interrupt was faster than the completion
-                        * interrupt. pfault_wait is valid. Set pfault_wait
-                        * back to zero and wake up the process. This can
-                        * safely be done because the task is still sleeping
-                        * and can't produce new pfaults. */
-                       tsk->thread.pfault_wait = 0;
-                       list_del(&tsk->thread.list);
-                       wake_up_process(tsk);
-                       put_task_struct(tsk);
-               } else {
-                       /* Completion interrupt was faster than initial
-                        * interrupt. Set pfault_wait to -1 so the initial
-                        * interrupt doesn't put the task to sleep.
-                        * If the task is not running, ignore the completion
-                        * interrupt since it must be a leftover of a PFAULT
-                        * CANCEL operation which didn't remove all pending
-                        * completion interrupts. */
-                       if (task_is_running(tsk))
-                               tsk->thread.pfault_wait = -1;
-               }
-       } else {
-               /* signal bit not set -> a real page is missing. */
-               if (WARN_ON_ONCE(tsk != current))
-                       goto out;
-               if (tsk->thread.pfault_wait == 1) {
-                       /* Already on the list with a reference: put to sleep */
-                       goto block;
-               } else if (tsk->thread.pfault_wait == -1) {
-                       /* Completion interrupt was faster than the initial
-                        * interrupt (pfault_wait == -1). Set pfault_wait
-                        * back to zero and exit. */
-                       tsk->thread.pfault_wait = 0;
-               } else {
-                       /* Initial interrupt arrived before completion
-                        * interrupt. Let the task sleep.
-                        * An extra task reference is needed since a different
-                        * cpu may set the task state to TASK_RUNNING again
-                        * before the scheduler is reached. */
-                       get_task_struct(tsk);
-                       tsk->thread.pfault_wait = 1;
-                       list_add(&tsk->thread.list, &pfault_list);
-block:
-                       /* Since this must be a userspace fault, there
-                        * is no kernel task state to trample. Rely on the
-                        * return to userspace schedule() to block. */
-                       __set_current_state(TASK_UNINTERRUPTIBLE);
-                       set_tsk_need_resched(tsk);
-                       set_preempt_need_resched();
-               }
-       }
-out:
-       spin_unlock(&pfault_lock);
-       put_task_struct(tsk);
-}
-
-static int pfault_cpu_dead(unsigned int cpu)
-{
-       struct thread_struct *thread, *next;
-       struct task_struct *tsk;
-
-       spin_lock_irq(&pfault_lock);
-       list_for_each_entry_safe(thread, next, &pfault_list, list) {
-               thread->pfault_wait = 0;
-               list_del(&thread->list);
-               tsk = container_of(thread, struct task_struct, thread);
-               wake_up_process(tsk);
-               put_task_struct(tsk);
-       }
-       spin_unlock_irq(&pfault_lock);
-       return 0;
-}
-
-static int __init pfault_irq_init(void)
-{
-       int rc;
-
-       rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
-       if (rc)
-               goto out_extint;
-       rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
-       if (rc)
-               goto out_pfault;
-       irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
-       cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
-                                 NULL, pfault_cpu_dead);
-       return 0;
-
-out_pfault:
-       unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
-out_extint:
-       pfault_disable = 1;
-       return rc;
-}
-early_initcall(pfault_irq_init);
-
-#endif /* CONFIG_PFAULT */
-
 #if IS_ENABLED(CONFIG_PGSTE)
 
 void do_secure_storage_access(struct pt_regs *regs)
index 989ebd0..906a7bf 100644 (file)
@@ -2514,6 +2514,7 @@ static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops thp_split_walk_ops = {
        .pmd_entry      = thp_split_walk_pmd_entry,
+       .walk_lock      = PGWALK_WRLOCK_VERIFY,
 };
 
 static inline void thp_split_mm(struct mm_struct *mm)
@@ -2565,6 +2566,7 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
 
 static const struct mm_walk_ops zap_zero_walk_ops = {
        .pmd_entry      = __zap_zero_pages,
+       .walk_lock      = PGWALK_WRLOCK,
 };
 
 /*
@@ -2655,6 +2657,7 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
        .hugetlb_entry          = __s390_enable_skey_hugetlb,
        .pte_entry              = __s390_enable_skey_pte,
        .pmd_entry              = __s390_enable_skey_pmd,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 int s390_enable_skey(void)
@@ -2692,6 +2695,7 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
 
 static const struct mm_walk_ops reset_cmma_walk_ops = {
        .pte_entry              = __s390_reset_cmma,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 void s390_reset_cmma(struct mm_struct *mm)
@@ -2728,6 +2732,7 @@ static int s390_gather_pages(pte_t *ptep, unsigned long addr,
 
 static const struct mm_walk_ops gather_pages_ops = {
        .pte_entry = s390_gather_pages,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 /*
@@ -2853,6 +2858,7 @@ int s390_replace_asce(struct gmap *gmap)
        page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
        if (!page)
                return -ENOMEM;
+       page->index = 0;
        table = page_to_virt(page);
        memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
 
index cbe1df1..c805b3e 100644 (file)
@@ -86,11 +86,12 @@ size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count)
        void *chunk;
        pte_t pte;
 
+       BUILD_BUG_ON(MEMCPY_REAL_SIZE != PAGE_SIZE);
        while (count) {
-               phys = src & PAGE_MASK;
-               offset = src & ~PAGE_MASK;
+               phys = src & MEMCPY_REAL_MASK;
+               offset = src & ~MEMCPY_REAL_MASK;
                chunk = (void *)(__memcpy_real_area + offset);
-               len = min(count, PAGE_SIZE - offset);
+               len = min(count, MEMCPY_REAL_SIZE - offset);
                pte = mk_pte_phys(phys, PAGE_KERNEL_RO);
 
                mutex_lock(&memcpy_real_mutex);
diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c
new file mode 100644 (file)
index 0000000..1aac13b
--- /dev/null
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 1999, 2023
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/sched/task.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/irq.h>
+#include <asm/asm-extable.h>
+#include <asm/pfault.h>
+#include <asm/diag.h>
+
+#define __SUBCODE_MASK 0x0600
+#define __PF_RES_FIELD 0x8000000000000000UL
+
+/*
+ * 'pfault' pseudo page faults routines.
+ */
+static int pfault_disable;
+
+static int __init nopfault(char *str)
+{
+       pfault_disable = 1;
+       return 1;
+}
+early_param("nopfault", nopfault);
+
+struct pfault_refbk {
+       u16 refdiagc;
+       u16 reffcode;
+       u16 refdwlen;
+       u16 refversn;
+       u64 refgaddr;
+       u64 refselmk;
+       u64 refcmpmk;
+       u64 reserved;
+};
+
+static struct pfault_refbk pfault_init_refbk = {
+       .refdiagc = 0x258,
+       .reffcode = 0,
+       .refdwlen = 5,
+       .refversn = 2,
+       .refgaddr = __LC_LPP,
+       .refselmk = 1UL << 48,
+       .refcmpmk = 1UL << 48,
+       .reserved = __PF_RES_FIELD
+};
+
+int __pfault_init(void)
+{
+       int rc = -EOPNOTSUPP;
+
+       if (pfault_disable)
+               return rc;
+       diag_stat_inc(DIAG_STAT_X258);
+       asm volatile(
+               "       diag    %[refbk],%[rc],0x258\n"
+               "0:     nopr    %%r7\n"
+               EX_TABLE(0b, 0b)
+               : [rc] "+d" (rc)
+               : [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk)
+               : "cc");
+       return rc;
+}
+
+static struct pfault_refbk pfault_fini_refbk = {
+       .refdiagc = 0x258,
+       .reffcode = 1,
+       .refdwlen = 5,
+       .refversn = 2,
+};
+
+void __pfault_fini(void)
+{
+       if (pfault_disable)
+               return;
+       diag_stat_inc(DIAG_STAT_X258);
+       asm volatile(
+               "       diag    %[refbk],0,0x258\n"
+               "0:     nopr    %%r7\n"
+               EX_TABLE(0b, 0b)
+               :
+               : [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk)
+               : "cc");
+}
+
+static DEFINE_SPINLOCK(pfault_lock);
+static LIST_HEAD(pfault_list);
+
+#define PF_COMPLETE    0x0080
+
+/*
+ * The mechanism of our pfault code: if Linux is running as guest, runs a user
+ * space process and the user space process accesses a page that the host has
+ * paged out we get a pfault interrupt.
+ *
+ * This allows us, within the guest, to schedule a different process. Without
+ * this mechanism the host would have to suspend the whole virtual cpu until
+ * the page has been paged in.
+ *
+ * So when we get such an interrupt then we set the state of the current task
+ * to uninterruptible and also set the need_resched flag. Both happens within
+ * interrupt context(!). If we later on want to return to user space we
+ * recognize the need_resched flag and then call schedule().  It's not very
+ * obvious how this works...
+ *
+ * Of course we have a lot of additional fun with the completion interrupt (->
+ * host signals that a page of a process has been paged in and the process can
+ * continue to run). This interrupt can arrive on any cpu and, since we have
+ * virtual cpus, actually appear before the interrupt that signals that a page
+ * is missing.
+ */
+static void pfault_interrupt(struct ext_code ext_code,
+                            unsigned int param32, unsigned long param64)
+{
+       struct task_struct *tsk;
+       __u16 subcode;
+       pid_t pid;
+
+       /*
+        * Get the external interruption subcode & pfault initial/completion
+        * signal bit. VM stores this in the 'cpu address' field associated
+        * with the external interrupt.
+        */
+       subcode = ext_code.subcode;
+       if ((subcode & 0xff00) != __SUBCODE_MASK)
+               return;
+       inc_irq_stat(IRQEXT_PFL);
+       /* Get the token (= pid of the affected task). */
+       pid = param64 & LPP_PID_MASK;
+       rcu_read_lock();
+       tsk = find_task_by_pid_ns(pid, &init_pid_ns);
+       if (tsk)
+               get_task_struct(tsk);
+       rcu_read_unlock();
+       if (!tsk)
+               return;
+       spin_lock(&pfault_lock);
+       if (subcode & PF_COMPLETE) {
+               /* signal bit is set -> a page has been swapped in by VM */
+               if (tsk->thread.pfault_wait == 1) {
+                       /*
+                        * Initial interrupt was faster than the completion
+                        * interrupt. pfault_wait is valid. Set pfault_wait
+                        * back to zero and wake up the process. This can
+                        * safely be done because the task is still sleeping
+                        * and can't produce new pfaults.
+                        */
+                       tsk->thread.pfault_wait = 0;
+                       list_del(&tsk->thread.list);
+                       wake_up_process(tsk);
+                       put_task_struct(tsk);
+               } else {
+                       /*
+                        * Completion interrupt was faster than initial
+                        * interrupt. Set pfault_wait to -1 so the initial
+                        * interrupt doesn't put the task to sleep.
+                        * If the task is not running, ignore the completion
+                        * interrupt since it must be a leftover of a PFAULT
+                        * CANCEL operation which didn't remove all pending
+                        * completion interrupts.
+                        */
+                       if (task_is_running(tsk))
+                               tsk->thread.pfault_wait = -1;
+               }
+       } else {
+               /* signal bit not set -> a real page is missing. */
+               if (WARN_ON_ONCE(tsk != current))
+                       goto out;
+               if (tsk->thread.pfault_wait == 1) {
+                       /* Already on the list with a reference: put to sleep */
+                       goto block;
+               } else if (tsk->thread.pfault_wait == -1) {
+                       /*
+                        * Completion interrupt was faster than the initial
+                        * interrupt (pfault_wait == -1). Set pfault_wait
+                        * back to zero and exit.
+                        */
+                       tsk->thread.pfault_wait = 0;
+               } else {
+                       /*
+                        * Initial interrupt arrived before completion
+                        * interrupt. Let the task sleep.
+                        * An extra task reference is needed since a different
+                        * cpu may set the task state to TASK_RUNNING again
+                        * before the scheduler is reached.
+                        */
+                       get_task_struct(tsk);
+                       tsk->thread.pfault_wait = 1;
+                       list_add(&tsk->thread.list, &pfault_list);
+block:
+                       /*
+                        * Since this must be a userspace fault, there
+                        * is no kernel task state to trample. Rely on the
+                        * return to userspace schedule() to block.
+                        */
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       set_tsk_need_resched(tsk);
+                       set_preempt_need_resched();
+               }
+       }
+out:
+       spin_unlock(&pfault_lock);
+       put_task_struct(tsk);
+}
+
+static int pfault_cpu_dead(unsigned int cpu)
+{
+       struct thread_struct *thread, *next;
+       struct task_struct *tsk;
+
+       spin_lock_irq(&pfault_lock);
+       list_for_each_entry_safe(thread, next, &pfault_list, list) {
+               thread->pfault_wait = 0;
+               list_del(&thread->list);
+               tsk = container_of(thread, struct task_struct, thread);
+               wake_up_process(tsk);
+               put_task_struct(tsk);
+       }
+       spin_unlock_irq(&pfault_lock);
+       return 0;
+}
+
+static int __init pfault_irq_init(void)
+{
+       int rc;
+
+       rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+       if (rc)
+               goto out_extint;
+       rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP;
+       if (rc)
+               goto out_pfault;
+       irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL);
+       cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead",
+                                 NULL, pfault_cpu_dead);
+       return 0;
+
+out_pfault:
+       unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt);
+out_extint:
+       pfault_disable = 1;
+       return rc;
+}
+early_initcall(pfault_irq_init);
index b266492..e44243b 100644 (file)
@@ -36,7 +36,7 @@ static void vmem_free_pages(unsigned long addr, int order)
 {
        /* We don't expect boot memory to be removed ever. */
        if (!slab_is_available() ||
-           WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
+           WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
                return;
        free_pages(addr, order);
 }
@@ -531,7 +531,7 @@ struct range arch_get_mappable_range(void)
        struct range mhp_range;
 
        mhp_range.start = 0;
-       mhp_range.end =  VMEM_MAX_PHYS - 1;
+       mhp_range.end = max_mappable - 1;
        return mhp_range;
 }
 
@@ -763,6 +763,8 @@ void __init vmem_map_init(void)
        if (static_key_enabled(&cpu_has_bear))
                set_memory_nx(0, 1);
        set_memory_nx(PAGE_SIZE, 1);
+       if (debug_pagealloc_enabled())
+               set_memory_4k(0, ident_map_size >> PAGE_SHIFT);
 
        pr_info("Write protected kernel read-only data: %luk\n",
                (unsigned long)(__end_rodata - _stext) >> 10);
index ee36779..ee90a91 100644 (file)
@@ -666,9 +666,4 @@ static struct miscdevice clp_misc_device = {
        .fops = &clp_misc_fops,
 };
 
-static int __init clp_misc_init(void)
-{
-       return misc_register(&clp_misc_device);
-}
-
-device_initcall(clp_misc_init);
+builtin_misc_device(clp_misc_device);
index cc06e4c..0eec82f 100644 (file)
@@ -108,13 +108,13 @@ int systemasic_irq_demux(int irq)
        __u32 j, bit;
 
        switch (irq) {
-       case 13:
+       case 13 + 16:
                level = 0;
                break;
-       case 11:
+       case 11 + 16:
                level = 1;
                break;
-       case  9:
+       case 9 + 16:
                level = 2;
                break;
        default:
index 533393d..0156566 100644 (file)
@@ -389,10 +389,10 @@ static unsigned char irl2irq[HL_NR_IRL];
 
 static int highlander_irq_demux(int irq)
 {
-       if (irq >= HL_NR_IRL || irq < 0 || !irl2irq[irq])
+       if (irq >= HL_NR_IRL + 16 || irq < 16 || !irl2irq[irq - 16])
                return irq;
 
-       return irl2irq[irq];
+       return irl2irq[irq - 16];
 }
 
 static void __init highlander_init_irq(void)
index e34f81e..d0a54a9 100644 (file)
@@ -117,10 +117,10 @@ static unsigned char irl2irq[R2D_NR_IRL];
 
 int rts7751r2d_irq_demux(int irq)
 {
-       if (irq >= R2D_NR_IRL || irq < 0 || !irl2irq[irq])
+       if (irq >= R2D_NR_IRL + 16 || irq < 16 || !irl2irq[irq - 16])
                return irq;
 
-       return irl2irq[irq];
+       return irl2irq[irq - 16];
 }
 
 /*
index efde2ed..9659a0b 100644 (file)
@@ -29,9 +29,9 @@ endchoice
 config HD64461_IRQ
        int "HD64461 IRQ"
        depends on HD64461
-       default "36"
+       default "52"
        help
-         The default setting of the HD64461 IRQ is 36.
+         The default setting of the HD64461 IRQ is 52.
 
          Do not change this unless you know what you are doing.
 
index 623012d..67716a4 100644 (file)
@@ -61,7 +61,7 @@ CONFIG_USB_STORAGE=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
index 41cb588..cd24cf0 100644 (file)
@@ -105,7 +105,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
index a080c5d..cf59b98 100644 (file)
@@ -168,7 +168,7 @@ CONFIG_EXT3_FS=y
 CONFIG_EXT4_FS=y
 CONFIG_XFS_FS=y
 CONFIG_BTRFS_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
 CONFIG_FSCACHE=m
index f661ef8..48f38ec 100644 (file)
@@ -60,7 +60,7 @@ CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
index ae72674..57923c3 100644 (file)
@@ -63,7 +63,7 @@ CONFIG_MMC=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_MSDOS_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_PROC_KCORE=y
index afb24cb..d2c485f 100644 (file)
 #define        HD64461_NIMR            HD64461_IO_OFFSET(0x5002)
 
 #define        HD64461_IRQBASE         OFFCHIP_IRQ_BASE
-#define        OFFCHIP_IRQ_BASE        64
+#define        OFFCHIP_IRQ_BASE        (64 + 16)
 #define        HD64461_IRQ_NUM         16
 
 #define        HD64461_IRQ_UART        (HD64461_IRQBASE+5)
index 97377e8..e90d585 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index 7c489e7..5010164 100644 (file)
@@ -65,7 +65,7 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT2_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_PROC_KCORE=y
 CONFIG_ROMFS_FS=m
index 7a13395..d0af82c 100644 (file)
@@ -15,7 +15,7 @@
 unsigned long __xchg_u32(volatile u32 *m, u32 new);
 void __xchg_called_with_bad_pointer(void);
 
-static inline unsigned long __arch_xchg(unsigned long x, __volatile__ void * ptr, int size)
+static __always_inline unsigned long __arch_xchg(unsigned long x, __volatile__ void * ptr, int size)
 {
        switch (size) {
        case 4:
index 66cd61d..3de2526 100644 (file)
@@ -87,7 +87,7 @@ xchg16(__volatile__ unsigned short *m, unsigned short val)
        return (load32 & mask) >> bit_shift;
 }
 
-static inline unsigned long
+static __always_inline unsigned long
 __arch_xchg(unsigned long x, __volatile__ void * ptr, int size)
 {
        switch (size) {
index 2667f35..0a0d5c3 100644 (file)
@@ -213,7 +213,6 @@ unsigned long __get_wchan(struct task_struct *task);
  */
 #define ARCH_HAS_PREFETCH
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 static inline void prefetch(const void *x)
 {
@@ -239,8 +238,6 @@ static inline void prefetchw(const void *x)
                             : "r" (x));
 }
 
-#define spin_lock_prefetch(x)  prefetchw(x)
-
 #define HAVE_ARCH_PICK_MMAP_LAYOUT
 
 int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap);
index faa835f..4ed06c7 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index 05ae535..630be79 100644 (file)
@@ -62,7 +62,7 @@ CONFIG_UML_NET_SLIRP=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=y
 CONFIG_QUOTA=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_PROC_KCORE=y
index 1c75723..8540d33 100644 (file)
@@ -60,7 +60,7 @@ CONFIG_UML_NET_SLIRP=y
 CONFIG_EXT4_FS=y
 CONFIG_REISERFS_FS=y
 CONFIG_QUOTA=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
 CONFIG_ISO9660_FS=m
 CONFIG_JOLIET=y
 CONFIG_PROC_KCORE=y
index 5026e7b..ff4bda9 100644 (file)
@@ -554,7 +554,7 @@ struct mconsole_output {
 
 static DEFINE_SPINLOCK(client_lock);
 static LIST_HEAD(clients);
-static char console_buf[MCONSOLE_MAX_DATA];
+static char console_buf[MCONSOLE_MAX_DATA] __nonstring;
 
 static void console_write(struct console *console, const char *string,
                          unsigned int len)
@@ -567,7 +567,7 @@ static void console_write(struct console *console, const char *string,
 
        while (len > 0) {
                n = min((size_t) len, ARRAY_SIZE(console_buf));
-               strncpy(console_buf, string, n);
+               memcpy(console_buf, string, n);
                string += n;
                len -= n;
 
index c650e42..c719e1e 100644 (file)
@@ -141,7 +141,7 @@ static int create_tap_fd(char *iface)
        }
        memset(&ifr, 0, sizeof(ifr));
        ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
-       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+       strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
 
        err = ioctl(fd, TUNSETIFF, (void *) &ifr);
        if (err != 0) {
@@ -171,7 +171,7 @@ static int create_raw_fd(char *iface, int flags, int proto)
                goto raw_fd_cleanup;
        }
        memset(&ifr, 0, sizeof(ifr));
-       strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1);
+       strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name));
        if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) {
                err = -errno;
                goto raw_fd_cleanup;
index 0347a19..981e11d 100644 (file)
@@ -50,7 +50,6 @@ static inline int printk(const char *fmt, ...)
 #endif
 
 extern int in_aton(char *str);
-extern size_t strlcpy(char *, const char *, size_t);
 extern size_t strlcat(char *, const char *, size_t);
 extern size_t strscpy(char *, const char *, size_t);
 
index 918fed7..b1bfed0 100644 (file)
@@ -437,7 +437,7 @@ void __init arch_cpu_finalize_init(void)
        os_check_bugs();
 }
 
-void apply_ibt_endbr(s32 *start, s32 *end)
+void apply_seal_endbr(s32 *start, s32 *end)
 {
 }
 
index 37d60e7..9e71794 100644 (file)
@@ -3,7 +3,6 @@
  * Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  */
 
-#include <linux/minmax.h>
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -51,7 +50,7 @@ static struct pollfds all_sigio_fds;
 
 static int write_sigio_thread(void *unused)
 {
-       struct pollfds *fds;
+       struct pollfds *fds, tmp;
        struct pollfd *p;
        int i, n, respond_fd;
        char c;
@@ -78,7 +77,9 @@ static int write_sigio_thread(void *unused)
                                               "write_sigio_thread : "
                                               "read on socket failed, "
                                               "err = %d\n", errno);
-                               swap(current_poll, next_poll);
+                               tmp = current_poll;
+                               current_poll = next_poll;
+                               next_poll = tmp;
                                respond_fd = sigio_private[1];
                        }
                        else {
index 7a1abb8..288c422 100644 (file)
@@ -40,7 +40,7 @@ static int __init make_uml_dir(void)
                                __func__);
                        goto err;
                }
-               strlcpy(dir, home, sizeof(dir));
+               strscpy(dir, home, sizeof(dir));
                uml_dir++;
        }
        strlcat(dir, uml_dir, sizeof(dir));
@@ -243,7 +243,7 @@ int __init set_umid(char *name)
        if (strlen(name) > UMID_LEN - 1)
                return -E2BIG;
 
-       strlcpy(umid, name, sizeof(umid));
+       strscpy(umid, name, sizeof(umid));
 
        return 0;
 }
@@ -262,7 +262,7 @@ static int __init make_umid(void)
        make_uml_dir();
 
        if (*umid == '\0') {
-               strlcpy(tmp, uml_dir, sizeof(tmp));
+               strscpy(tmp, uml_dir, sizeof(tmp));
                strlcat(tmp, "XXXXXX", sizeof(tmp));
                fd = mkstemp(tmp);
                if (fd < 0) {
index 7422db4..8d9e4b3 100644 (file)
@@ -1308,44 +1308,8 @@ config X86_REBOOTFIXUPS
          Say N otherwise.
 
 config MICROCODE
-       bool "CPU microcode loading support"
-       default y
+       def_bool y
        depends on CPU_SUP_AMD || CPU_SUP_INTEL
-       help
-         If you say Y here, you will be able to update the microcode on
-         Intel and AMD processors. The Intel support is for the IA32 family,
-         e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The
-         AMD support is for families 0x10 and later. You will obviously need
-         the actual microcode binary data itself which is not shipped with
-         the Linux kernel.
-
-         The preferred method to load microcode from a detached initrd is described
-         in Documentation/arch/x86/microcode.rst. For that you need to enable
-         CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the
-         initrd for microcode blobs.
-
-         In addition, you can build the microcode into the kernel. For that you
-         need to add the vendor-supplied microcode to the CONFIG_EXTRA_FIRMWARE
-         config option.
-
-config MICROCODE_INTEL
-       bool "Intel microcode loading support"
-       depends on CPU_SUP_INTEL && MICROCODE
-       default MICROCODE
-       help
-         This options enables microcode patch loading support for Intel
-         processors.
-
-         For the current Intel microcode data package go to
-         <https://downloadcenter.intel.com> and search for
-         'Linux Processor Microcode Data File'.
-
-config MICROCODE_AMD
-       bool "AMD microcode loading support"
-       depends on CPU_SUP_AMD && MICROCODE
-       help
-         If you select this option, microcode patch loading support for AMD
-         processors will be enabled.
 
 config MICROCODE_LATE_LOADING
        bool "Late microcode loading (DANGEROUS)"
@@ -2593,6 +2557,13 @@ config CPU_IBRS_ENTRY
          This mitigates both spectre_v2 and retbleed at great cost to
          performance.
 
+config CPU_SRSO
+       bool "Mitigate speculative RAS overflow on AMD"
+       depends on CPU_SUP_AMD && X86_64 && RETHUNK
+       default y
+       help
+         Enable the SRSO mitigation needed on AMD Zen1-4 machines.
+
 config SLS
        bool "Mitigate Straight-Line-Speculation"
        depends on CC_HAS_SLS && X86_64
@@ -2603,6 +2574,25 @@ config SLS
          against straight line speculation. The kernel image might be slightly
          larger.
 
+config GDS_FORCE_MITIGATION
+       bool "Force GDS Mitigation"
+       depends on CPU_SUP_INTEL
+       default n
+       help
+         Gather Data Sampling (GDS) is a hardware vulnerability which allows
+         unprivileged speculative access to data which was previously stored in
+         vector registers.
+
+         This option is equivalent to setting gather_data_sampling=force on the
+         command line. The microcode mitigation is used if present, otherwise
+         AVX is disabled as a mitigation. On affected systems that are missing
+         the microcode any userspace code that unconditionally uses AVX will
+         break with this option set.
+
+         Setting this option on systems not vulnerable to GDS has no effect.
+
+         If in doubt, say N.
+
 endif
 
 config ARCH_HAS_ADD_PAGES
index 40d2ff5..71fc531 100644 (file)
@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack
 ifeq ($(CONFIG_LD_IS_BFD),y)
 LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
 endif
+ifeq ($(CONFIG_EFI_STUB),y)
+# ensure that the static EFI stub library will be pulled in, even if it is
+# never referenced explicitly from the startup code
+LDFLAGS_vmlinux += -u efi_pe_entry
+endif
 LDFLAGS_vmlinux += -T
 
 hostprogs      := mkpiggy
index 4ca70bf..f4e22ef 100644 (file)
@@ -26,8 +26,8 @@
  * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode()
  * is the first thing that runs after switching to long mode. Depending on
  * whether the EFI handover protocol or the compat entry point was used to
- * enter the kernel, it will either branch to the 64-bit EFI handover
- * entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF
+ * enter the kernel, it will either branch to the common 64-bit EFI stub
+ * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF
  * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a
  * struct bootparams pointer as the third argument, so the presence of such a
  * pointer is used to disambiguate.
  *  | efi32_pe_entry   |---->|            |            |       +-----------+--+
  *  +------------------+     |            |     +------+----------------+  |
  *                           | startup_32 |---->| startup_64_mixed_mode |  |
- *  +------------------+     |            |     +------+----------------+  V
- *  | efi32_stub_entry |---->|            |            |     +------------------+
- *  +------------------+     +------------+            +---->| efi64_stub_entry |
- *                                                           +-------------+----+
- *                           +------------+     +----------+               |
- *                           | startup_64 |<----| efi_main |<--------------+
- *                           +------------+     +----------+
+ *  +------------------+     |            |     +------+----------------+  |
+ *  | efi32_stub_entry |---->|            |            |                   |
+ *  +------------------+     +------------+            |                   |
+ *                                                     V                   |
+ *                           +------------+     +----------------+         |
+ *                           | startup_64 |<----| efi_stub_entry |<--------+
+ *                           +------------+     +----------------+
  */
 SYM_FUNC_START(startup_64_mixed_mode)
        lea     efi32_boot_args(%rip), %rdx
        mov     0(%rdx), %edi
        mov     4(%rdx), %esi
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
        mov     8(%rdx), %edx           // saved bootparams pointer
        test    %edx, %edx
-       jnz     efi64_stub_entry
+       jnz     efi_stub_entry
+#endif
        /*
         * efi_pe_entry uses MS calling convention, which requires 32 bytes of
         * shadow space on the stack even if all arguments are passed in
@@ -138,6 +140,28 @@ SYM_FUNC_START(__efi64_thunk)
 SYM_FUNC_END(__efi64_thunk)
 
        .code32
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+SYM_FUNC_START(efi32_stub_entry)
+       call    1f
+1:     popl    %ecx
+
+       /* Clear BSS */
+       xorl    %eax, %eax
+       leal    (_bss - 1b)(%ecx), %edi
+       leal    (_ebss - 1b)(%ecx), %ecx
+       subl    %edi, %ecx
+       shrl    $2, %ecx
+       cld
+       rep     stosl
+
+       add     $0x4, %esp              /* Discard return address */
+       popl    %ecx
+       popl    %edx
+       popl    %esi
+       jmp     efi32_entry
+SYM_FUNC_END(efi32_stub_entry)
+#endif
+
 /*
  * EFI service pointer must be in %edi.
  *
@@ -218,7 +242,7 @@ SYM_FUNC_END(efi_enter32)
  * stub may still exit and return to the firmware using the Exit() EFI boot
  * service.]
  */
-SYM_FUNC_START(efi32_entry)
+SYM_FUNC_START_LOCAL(efi32_entry)
        call    1f
 1:     pop     %ebx
 
@@ -245,10 +269,6 @@ SYM_FUNC_START(efi32_entry)
        jmp     startup_32
 SYM_FUNC_END(efi32_entry)
 
-#define ST32_boottime          60 // offsetof(efi_system_table_32_t, boottime)
-#define BS32_handle_protocol   88 // offsetof(efi_boot_services_32_t, handle_protocol)
-#define LI32_image_base                32 // offsetof(efi_loaded_image_32_t, image_base)
-
 /*
  * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
  *                            efi_system_table_32_t *sys_table)
@@ -256,8 +276,6 @@ SYM_FUNC_END(efi32_entry)
 SYM_FUNC_START(efi32_pe_entry)
        pushl   %ebp
        movl    %esp, %ebp
-       pushl   %eax                            // dummy push to allocate loaded_image
-
        pushl   %ebx                            // save callee-save registers
        pushl   %edi
 
@@ -266,48 +284,8 @@ SYM_FUNC_START(efi32_pe_entry)
        movl    $0x80000003, %eax               // EFI_UNSUPPORTED
        jnz     2f
 
-       call    1f
-1:     pop     %ebx
-
-       /* Get the loaded image protocol pointer from the image handle */
-       leal    -4(%ebp), %eax
-       pushl   %eax                            // &loaded_image
-       leal    (loaded_image_proto - 1b)(%ebx), %eax
-       pushl   %eax                            // pass the GUID address
-       pushl   8(%ebp)                         // pass the image handle
-
-       /*
-        * Note the alignment of the stack frame.
-        *   sys_table
-        *   handle             <-- 16-byte aligned on entry by ABI
-        *   return address
-        *   frame pointer
-        *   loaded_image       <-- local variable
-        *   saved %ebx         <-- 16-byte aligned here
-        *   saved %edi
-        *   &loaded_image
-        *   &loaded_image_proto
-        *   handle             <-- 16-byte aligned for call to handle_protocol
-        */
-
-       movl    12(%ebp), %eax                  // sys_table
-       movl    ST32_boottime(%eax), %eax       // sys_table->boottime
-       call    *BS32_handle_protocol(%eax)     // sys_table->boottime->handle_protocol
-       addl    $12, %esp                       // restore argument space
-       testl   %eax, %eax
-       jnz     2f
-
        movl    8(%ebp), %ecx                   // image_handle
        movl    12(%ebp), %edx                  // sys_table
-       movl    -4(%ebp), %esi                  // loaded_image
-       movl    LI32_image_base(%esi), %esi     // loaded_image->image_base
-       leal    (startup_32 - 1b)(%ebx), %ebp   // runtime address of startup_32
-       /*
-        * We need to set the image_offset variable here since startup_32() will
-        * use it before we get to the 64-bit efi_pe_entry() in C code.
-        */
-       subl    %esi, %ebp                      // calculate image_offset
-       movl    %ebp, (image_offset - 1b)(%ebx) // save image_offset
        xorl    %esi, %esi
        jmp     efi32_entry                     // pass %ecx, %edx, %esi
                                                // no other registers remain live
@@ -318,14 +296,13 @@ SYM_FUNC_START(efi32_pe_entry)
        RET
 SYM_FUNC_END(efi32_pe_entry)
 
-       .section ".rodata"
-       /* EFI loaded image protocol GUID */
-       .balign 4
-SYM_DATA_START_LOCAL(loaded_image_proto)
-       .long   0x5b1b31a1
-       .word   0x9562, 0x11d2
-       .byte   0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
-SYM_DATA_END(loaded_image_proto)
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+       .org    efi32_stub_entry + 0x200
+       .code64
+SYM_FUNC_START_NOALIGN(efi64_stub_entry)
+       jmp     efi_handover_entry
+SYM_FUNC_END(efi64_stub_entry)
+#endif
 
        .data
        .balign 8
index 5313c5c..19a8251 100644 (file)
@@ -7,7 +7,7 @@
 #include "misc.h"
 #include "error.h"
 
-void warn(char *m)
+void warn(const char *m)
 {
        error_putstr("\n\n");
        error_putstr(m);
index 86fe33b..31f9e08 100644 (file)
@@ -4,7 +4,7 @@
 
 #include <linux/compiler.h>
 
-void warn(char *m);
+void warn(const char *m);
 void error(char *m) __noreturn;
 void panic(const char *fmt, ...) __noreturn __cold;
 
index 987ae72..1cfe980 100644 (file)
@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32)
 
 #ifdef CONFIG_RELOCATABLE
        leal    startup_32@GOTOFF(%edx), %ebx
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32() will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry() will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- *     image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
-       subl    image_offset@GOTOFF(%edx), %ebx
-#endif
-
        movl    BP_kernel_alignment(%esi), %eax
        decl    %eax
        addl    %eax, %ebx
@@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32)
        jmp     *%eax
 SYM_FUNC_END(startup_32)
 
-#ifdef CONFIG_EFI_STUB
-SYM_FUNC_START(efi32_stub_entry)
-       add     $0x4, %esp
-       movl    8(%esp), %esi   /* save boot_params pointer */
-       call    efi_main
-       /* efi_main returns the possibly relocated address of startup_32 */
-       jmp     *%eax
-SYM_FUNC_END(efi32_stub_entry)
-SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
-#endif
-
        .text
 SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 
@@ -179,13 +155,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
  */
        /* push arguments for extract_kernel: */
 
-       pushl   output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
        pushl   %ebp                    /* output address */
-       pushl   input_len@GOTOFF(%ebx)  /* input_len */
-       leal    input_data@GOTOFF(%ebx), %eax
-       pushl   %eax                    /* input_data */
-       leal    boot_heap@GOTOFF(%ebx), %eax
-       pushl   %eax                    /* heap area */
        pushl   %esi                    /* real mode pointer */
        call    extract_kernel          /* returns kernel entry point in %eax */
        addl    $24, %esp
@@ -213,8 +183,6 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
  */
        .bss
        .balign 4
-boot_heap:
-       .fill BOOT_HEAP_SIZE, 1, 0
 boot_stack:
        .fill BOOT_STACK_SIZE, 1, 0
 boot_stack_end:
index 03c4328..bf4a10a 100644 (file)
@@ -146,19 +146,6 @@ SYM_FUNC_START(startup_32)
 
 #ifdef CONFIG_RELOCATABLE
        movl    %ebp, %ebx
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- *     image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
-       subl    rva(image_offset)(%ebp), %ebx
-#endif
-
        movl    BP_kernel_alignment(%esi), %eax
        decl    %eax
        addl    %eax, %ebx
@@ -294,17 +281,6 @@ SYM_FUNC_START(startup_32)
        lret
 SYM_FUNC_END(startup_32)
 
-#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL)
-       .org 0x190
-SYM_FUNC_START(efi32_stub_entry)
-       add     $0x4, %esp              /* Discard return address */
-       popl    %ecx
-       popl    %edx
-       popl    %esi
-       jmp     efi32_entry
-SYM_FUNC_END(efi32_stub_entry)
-#endif
-
        .code64
        .org 0x200
 SYM_CODE_START(startup_64)
@@ -346,20 +322,6 @@ SYM_CODE_START(startup_64)
        /* Start with the delta to where the kernel will run at. */
 #ifdef CONFIG_RELOCATABLE
        leaq    startup_32(%rip) /* - $startup_32 */, %rbp
-
-#ifdef CONFIG_EFI_STUB
-/*
- * If we were loaded via the EFI LoadImage service, startup_32 will be at an
- * offset to the start of the space allocated for the image. efi_pe_entry will
- * set up image_offset to tell us where the image actually starts, so that we
- * can use the full available buffer.
- *     image_offset = startup_32 - image_base
- * Otherwise image_offset will be zero and has no effect on the calculations.
- */
-       movl    image_offset(%rip), %eax
-       subq    %rax, %rbp
-#endif
-
        movl    BP_kernel_alignment(%rsi), %eax
        decl    %eax
        addq    %rax, %rbp
@@ -398,10 +360,6 @@ SYM_CODE_START(startup_64)
         * For the trampoline, we need the top page table to reside in lower
         * memory as we don't have a way to load 64-bit values into CR3 in
         * 32-bit mode.
-        *
-        * We go though the trampoline even if we don't have to: if we're
-        * already in a desired paging mode. This way the trampoline code gets
-        * tested on every boot.
         */
 
        /* Make sure we have GDT with 32-bit code segment */
@@ -416,10 +374,14 @@ SYM_CODE_START(startup_64)
        lretq
 
 .Lon_kernel_cs:
+       /*
+        * RSI holds a pointer to a boot_params structure provided by the
+        * loader, and this needs to be preserved across C function calls. So
+        * move it into a callee saved register.
+        */
+       movq    %rsi, %r15
 
-       pushq   %rsi
        call    load_stage1_idt
-       popq    %rsi
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
        /*
@@ -430,63 +392,24 @@ SYM_CODE_START(startup_64)
         * CPUID instructions being issued, so go ahead and do that now via
         * sev_enable(), which will also handle the rest of the SEV-related
         * detection/setup to ensure that has been done in advance of any dependent
-        * code.
+        * code. Pass the boot_params pointer as the first argument.
         */
-       pushq   %rsi
-       movq    %rsi, %rdi              /* real mode address */
+       movq    %r15, %rdi
        call    sev_enable
-       popq    %rsi
 #endif
 
        /*
-        * paging_prepare() sets up the trampoline and checks if we need to
-        * enable 5-level paging.
+        * configure_5level_paging() updates the number of paging levels using
+        * a trampoline in 32-bit addressable memory if the current number does
+        * not match the desired number.
         *
-        * paging_prepare() returns a two-quadword structure which lands
-        * into RDX:RAX:
-        *   - Address of the trampoline is returned in RAX.
-        *   - Non zero RDX means trampoline needs to enable 5-level
-        *     paging.
-        *
-        * RSI holds real mode data and needs to be preserved across
-        * this function call.
-        */
-       pushq   %rsi
-       movq    %rsi, %rdi              /* real mode address */
-       call    paging_prepare
-       popq    %rsi
-
-       /* Save the trampoline address in RCX */
-       movq    %rax, %rcx
-
-       /*
-        * Load the address of trampoline_return() into RDI.
-        * It will be used by the trampoline to return to the main code.
+        * Pass the boot_params pointer as the first argument. The second
+        * argument is the relocated address of the page table to use instead
+        * of the page table in trampoline memory (if required).
         */
-       leaq    trampoline_return(%rip), %rdi
-
-       /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
-       pushq   $__KERNEL32_CS
-       leaq    TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
-       pushq   %rax
-       lretq
-trampoline_return:
-       /* Restore the stack, the 32-bit trampoline uses its own stack */
-       leaq    rva(boot_stack_end)(%rbx), %rsp
-
-       /*
-        * cleanup_trampoline() would restore trampoline memory.
-        *
-        * RDI is address of the page table to use instead of page table
-        * in trampoline memory (if required).
-        *
-        * RSI holds real mode data and needs to be preserved across
-        * this function call.
-        */
-       pushq   %rsi
-       leaq    rva(top_pgtable)(%rbx), %rdi
-       call    cleanup_trampoline
-       popq    %rsi
+       movq    %r15, %rdi
+       leaq    rva(top_pgtable)(%rbx), %rsi
+       call    configure_5level_paging
 
        /* Zero EFLAGS */
        pushq   $0
@@ -496,7 +419,6 @@ trampoline_return:
  * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-       pushq   %rsi
        leaq    (_bss-8)(%rip), %rsi
        leaq    rva(_bss-8)(%rbx), %rdi
        movl    $(_bss - startup_32), %ecx
@@ -504,7 +426,6 @@ trampoline_return:
        std
        rep     movsq
        cld
-       popq    %rsi
 
        /*
         * The GDT may get overwritten either during the copy we just did or
@@ -523,21 +444,6 @@ trampoline_return:
        jmp     *%rax
 SYM_CODE_END(startup_64)
 
-#ifdef CONFIG_EFI_STUB
-#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
-       .org 0x390
-#endif
-SYM_FUNC_START(efi64_stub_entry)
-       and     $~0xf, %rsp                     /* realign the stack */
-       movq    %rdx, %rbx                      /* save boot_params pointer */
-       call    efi_main
-       movq    %rbx,%rsi
-       leaq    rva(startup_64)(%rax), %rax
-       jmp     *%rax
-SYM_FUNC_END(efi64_stub_entry)
-SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry)
-#endif
-
        .text
 SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
 
@@ -551,128 +457,122 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
        shrq    $3, %rcx
        rep     stosq
 
-       pushq   %rsi
        call    load_stage2_idt
 
        /* Pass boot_params to initialize_identity_maps() */
-       movq    (%rsp), %rdi
+       movq    %r15, %rdi
        call    initialize_identity_maps
-       popq    %rsi
 
 /*
  * Do the extraction, and jump to the new kernel..
  */
-       pushq   %rsi                    /* Save the real mode argument */
-       movq    %rsi, %rdi              /* real mode address */
-       leaq    boot_heap(%rip), %rsi   /* malloc area for uncompression */
-       leaq    input_data(%rip), %rdx  /* input_data */
-       movl    input_len(%rip), %ecx   /* input_len */
-       movq    %rbp, %r8               /* output target address */
-       movl    output_len(%rip), %r9d  /* decompressed length, end of relocs */
+       /* pass struct boot_params pointer and output target address */
+       movq    %r15, %rdi
+       movq    %rbp, %rsi
        call    extract_kernel          /* returns kernel entry point in %rax */
-       popq    %rsi
 
 /*
  * Jump to the decompressed kernel.
  */
+       movq    %r15, %rsi
        jmp     *%rax
 SYM_FUNC_END(.Lrelocated)
 
-       .code32
 /*
- * This is the 32-bit trampoline that will be copied over to low memory.
+ * This is the 32-bit trampoline that will be copied over to low memory. It
+ * will be called using the ordinary 64-bit calling convention from code
+ * running in 64-bit mode.
  *
- * RDI contains the return address (might be above 4G).
- * ECX contains the base address of the trampoline memory.
- * Non zero RDX means trampoline needs to enable 5-level paging.
+ * Return address is at the top of the stack (might be above 4G).
+ * The first argument (EDI) contains the address of the temporary PGD level
+ * page table in 32-bit addressable memory which will be programmed into
+ * register CR3.
  */
+       .section ".rodata", "a", @progbits
 SYM_CODE_START(trampoline_32bit_src)
-       /* Set up data and stack segments */
-       movl    $__KERNEL_DS, %eax
-       movl    %eax, %ds
-       movl    %eax, %ss
+       /*
+        * Preserve callee save 64-bit registers on the stack: this is
+        * necessary because the architecture does not guarantee that GPRs will
+        * retain their full 64-bit values across a 32-bit mode switch.
+        */
+       pushq   %r15
+       pushq   %r14
+       pushq   %r13
+       pushq   %r12
+       pushq   %rbp
+       pushq   %rbx
+
+       /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
+       movq    %rsp, %rbx
+       shrq    $32, %rbx
+
+       /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
+       pushq   $__KERNEL32_CS
+       leaq    0f(%rip), %rax
+       pushq   %rax
+       lretq
 
-       /* Set up new stack */
-       leal    TRAMPOLINE_32BIT_STACK_END(%ecx), %esp
+       /*
+        * The 32-bit code below will do a far jump back to long mode and end
+        * up here after reconfiguring the number of paging levels. First, the
+        * stack pointer needs to be restored to its full 64-bit value before
+        * the callee save register contents can be popped from the stack.
+        */
+.Lret:
+       shlq    $32, %rbx
+       orq     %rbx, %rsp
+
+       /* Restore the preserved 64-bit registers */
+       popq    %rbx
+       popq    %rbp
+       popq    %r12
+       popq    %r13
+       popq    %r14
+       popq    %r15
+       retq
 
+       .code32
+0:
        /* Disable paging */
        movl    %cr0, %eax
        btrl    $X86_CR0_PG_BIT, %eax
        movl    %eax, %cr0
 
-       /* Check what paging mode we want to be in after the trampoline */
-       testl   %edx, %edx
-       jz      1f
-
-       /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */
-       movl    %cr4, %eax
-       testl   $X86_CR4_LA57, %eax
-       jnz     3f
-       jmp     2f
-1:
-       /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */
-       movl    %cr4, %eax
-       testl   $X86_CR4_LA57, %eax
-       jz      3f
-2:
        /* Point CR3 to the trampoline's new top level page table */
-       leal    TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax
-       movl    %eax, %cr3
-3:
+       movl    %edi, %cr3
+
        /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */
-       pushl   %ecx
-       pushl   %edx
        movl    $MSR_EFER, %ecx
        rdmsr
        btsl    $_EFER_LME, %eax
        /* Avoid writing EFER if no change was made (for TDX guest) */
        jc      1f
        wrmsr
-1:     popl    %edx
-       popl    %ecx
-
-#ifdef CONFIG_X86_MCE
-       /*
-        * Preserve CR4.MCE if the kernel will enable #MC support.
-        * Clearing MCE may fault in some environments (that also force #MC
-        * support). Any machine check that occurs before #MC support is fully
-        * configured will crash the system regardless of the CR4.MCE value set
-        * here.
-        */
-       movl    %cr4, %eax
-       andl    $X86_CR4_MCE, %eax
-#else
-       movl    $0, %eax
-#endif
-
-       /* Enable PAE and LA57 (if required) paging modes */
-       orl     $X86_CR4_PAE, %eax
-       testl   %edx, %edx
-       jz      1f
-       orl     $X86_CR4_LA57, %eax
 1:
+       /* Toggle CR4.LA57 */
+       movl    %cr4, %eax
+       btcl    $X86_CR4_LA57_BIT, %eax
        movl    %eax, %cr4
 
-       /* Calculate address of paging_enabled() once we are executing in the trampoline */
-       leal    .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax
-
-       /* Prepare the stack for far return to Long Mode */
-       pushl   $__KERNEL_CS
-       pushl   %eax
-
        /* Enable paging again. */
        movl    %cr0, %eax
        btsl    $X86_CR0_PG_BIT, %eax
        movl    %eax, %cr0
 
-       lret
+       /*
+        * Return to the 64-bit calling code using LJMP rather than LRET, to
+        * avoid the need for a 32-bit addressable stack. The destination
+        * address will be adjusted after the template code is copied into a
+        * 32-bit addressable buffer.
+        */
+.Ljmp: ljmpl   $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
 SYM_CODE_END(trampoline_32bit_src)
 
-       .code64
-SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
-       /* Return from the trampoline */
-       jmp     *%rdi
-SYM_FUNC_END(.Lpaging_enabled)
+/*
+ * This symbol is placed right after trampoline_32bit_src() so its address can
+ * be used to infer the size of the trampoline code.
+ */
+SYM_DATA(trampoline_ljmp_imm_offset, .word  .Ljmp + 1 - trampoline_32bit_src)
 
        /*
          * The trampoline code has a size limit.
@@ -681,7 +581,7 @@ SYM_FUNC_END(.Lpaging_enabled)
         */
        .org    trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
 
-       .code32
+       .text
 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
        /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
 1:
@@ -726,8 +626,6 @@ SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end)
  */
        .bss
        .balign 4
-SYM_DATA_LOCAL(boot_heap,      .fill BOOT_HEAP_SIZE, 1, 0)
-
 SYM_DATA_START_LOCAL(boot_stack)
        .fill BOOT_STACK_SIZE, 1, 0
        .balign 16
index 6debb81..3cdf94b 100644 (file)
@@ -63,7 +63,14 @@ void load_stage2_idt(void)
        set_idt_entry(X86_TRAP_PF, boot_page_fault);
 
 #ifdef CONFIG_AMD_MEM_ENCRYPT
-       set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+       /*
+        * Clear the second stage #VC handler in case guest types
+        * needing #VC have not been detected.
+        */
+       if (sev_status & BIT(1))
+               set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+       else
+               set_idt_entry(X86_TRAP_VC, NULL);
 #endif
 
        load_boot_idt(&boot_idt_desc);
index 94b7abc..f711f2a 100644 (file)
@@ -330,6 +330,33 @@ static size_t parse_elf(void *output)
        return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
 }
 
+const unsigned long kernel_total_size = VO__end - VO__text;
+
+static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
+
+extern unsigned char input_data[];
+extern unsigned int input_len, output_len;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+                               void (*error)(char *x))
+{
+       unsigned long entry;
+
+       if (!free_mem_ptr) {
+               free_mem_ptr     = (unsigned long)boot_heap;
+               free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap);
+       }
+
+       if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len,
+                        NULL, error) < 0)
+               return ULONG_MAX;
+
+       entry = parse_elf(outbuf);
+       handle_relocations(outbuf, output_len, virt_addr);
+
+       return entry;
+}
+
 /*
  * The compressed kernel image (ZO), has been moved so that its position
  * is against the end of the buffer used to hold the uncompressed kernel
@@ -347,14 +374,10 @@ static size_t parse_elf(void *output)
  *             |-------uncompressed kernel image---------|
  *
  */
-asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
-                                 unsigned char *input_data,
-                                 unsigned long input_len,
-                                 unsigned char *output,
-                                 unsigned long output_len)
+asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
 {
-       const unsigned long kernel_total_size = VO__end - VO__text;
        unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+       memptr heap = (memptr)boot_heap;
        unsigned long needed_size;
        size_t entry_offset;
 
@@ -412,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
         * entries. This ensures the full mapped area is usable RAM
         * and doesn't include any reserved areas.
         */
-       needed_size = max(output_len, kernel_total_size);
+       needed_size = max_t(unsigned long, output_len, kernel_total_size);
 #ifdef CONFIG_X86_64
        needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
 #endif
@@ -443,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
 #ifdef CONFIG_X86_64
        if (heap > 0x3fffffffffffUL)
                error("Destination address too large");
-       if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
+       if (virt_addr + needed_size > KERNEL_IMAGE_SIZE)
                error("Destination virtual address is beyond the kernel mapping area");
 #else
        if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
@@ -461,10 +484,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
                accept_memory(__pa(output), __pa(output) + needed_size);
        }
 
-       __decompress(input_data, input_len, NULL, NULL, output, output_len,
-                       NULL, error);
-       entry_offset = parse_elf(output);
-       handle_relocations(output, output_len, virt_addr);
+       entry_offset = decompress_kernel(output, virt_addr, error);
 
        debug_putstr("done.\nBooting the kernel (entry_offset: 0x");
        debug_puthex(entry_offset);
index 964fe90..cc70d3f 100644 (file)
@@ -179,9 +179,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
 #endif
 
 /* ident_map_64.c */
-#ifdef CONFIG_X86_5LEVEL
 extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
-#endif
 extern void kernel_add_identity_map(unsigned long start, unsigned long end);
 
 /* Used by PAGE_KERN* macros: */
index cc9b252..6d595ab 100644 (file)
@@ -3,18 +3,16 @@
 
 #define TRAMPOLINE_32BIT_SIZE          (2 * PAGE_SIZE)
 
-#define TRAMPOLINE_32BIT_PGTABLE_OFFSET        0
-
 #define TRAMPOLINE_32BIT_CODE_OFFSET   PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE     0x80
-
-#define TRAMPOLINE_32BIT_STACK_END     TRAMPOLINE_32BIT_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE     0xA0
 
 #ifndef __ASSEMBLER__
 
 extern unsigned long *trampoline_32bit;
 
-extern void trampoline_32bit_src(void *return_ptr);
+extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
+
+extern const u16 trampoline_ljmp_imm_offset;
 
 #endif /* __ASSEMBLER__ */
 #endif /* BOOT_COMPRESSED_PAGETABLE_H */
index 2ac12ff..7939eb6 100644 (file)
@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39;
 unsigned int __section(".data") ptrs_per_p4d = 1;
 #endif
 
-struct paging_config {
-       unsigned long trampoline_start;
-       unsigned long l5_required;
-};
-
 /* Buffer to preserve trampoline memory */
 static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
 
@@ -29,7 +24,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
  * purposes.
  *
  * Avoid putting the pointer into .bss as it will be cleared between
- * paging_prepare() and extract_kernel().
+ * configure_5level_paging() and extract_kernel().
  */
 unsigned long *trampoline_32bit __section(".data");
 
@@ -106,12 +101,13 @@ static unsigned long find_trampoline_placement(void)
        return bios_start - TRAMPOLINE_32BIT_SIZE;
 }
 
-struct paging_config paging_prepare(void *rmode)
+asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
 {
-       struct paging_config paging_config = {};
+       void (*toggle_la57)(void *cr3);
+       bool l5_required = false;
 
        /* Initialize boot_params. Required for cmdline_find_option_bool(). */
-       boot_params = rmode;
+       boot_params = bp;
 
        /*
         * Check if LA57 is desired and supported.
@@ -129,12 +125,22 @@ struct paging_config paging_prepare(void *rmode)
                        !cmdline_find_option_bool("no5lvl") &&
                        native_cpuid_eax(0) >= 7 &&
                        (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
-               paging_config.l5_required = 1;
+               l5_required = true;
+
+               /* Initialize variables for 5-level paging */
+               __pgtable_l5_enabled = 1;
+               pgdir_shift = 48;
+               ptrs_per_p4d = 512;
        }
 
-       paging_config.trampoline_start = find_trampoline_placement();
+       /*
+        * The trampoline will not be used if the paging mode is already set to
+        * the desired one.
+        */
+       if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
+               return;
 
-       trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
+       trampoline_32bit = (unsigned long *)find_trampoline_placement();
 
        /* Preserve trampoline memory */
        memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
@@ -143,32 +149,32 @@ struct paging_config paging_prepare(void *rmode)
        memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
 
        /* Copy trampoline code in place */
-       memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+       toggle_la57 = memcpy(trampoline_32bit +
+                       TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
                        &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
 
        /*
+        * Avoid the need for a stack in the 32-bit trampoline code, by using
+        * LJMP rather than LRET to return back to long mode. LJMP takes an
+        * immediate absolute address, which needs to be adjusted based on the
+        * placement of the trampoline.
+        */
+       *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) +=
+                                               (unsigned long)toggle_la57;
+
+       /*
         * The code below prepares page table in trampoline memory.
         *
         * The new page table will be used by trampoline code for switching
         * from 4- to 5-level paging or vice versa.
-        *
-        * If switching is not required, the page table is unused: trampoline
-        * code wouldn't touch CR3.
-        */
-
-       /*
-        * We are not going to use the page table in trampoline memory if we
-        * are already in the desired paging mode.
         */
-       if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
-               goto out;
 
-       if (paging_config.l5_required) {
+       if (l5_required) {
                /*
                 * For 4- to 5-level paging transition, set up current CR3 as
                 * the first and the only entry in a new top-level page table.
                 */
-               trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
+               *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC;
        } else {
                unsigned long src;
 
@@ -181,38 +187,17 @@ struct paging_config paging_prepare(void *rmode)
                 * may be above 4G.
                 */
                src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
-               memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
-                      (void *)src, PAGE_SIZE);
+               memcpy(trampoline_32bit, (void *)src, PAGE_SIZE);
        }
 
-out:
-       return paging_config;
-}
-
-void cleanup_trampoline(void *pgtable)
-{
-       void *trampoline_pgtable;
-
-       trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
+       toggle_la57(trampoline_32bit);
 
        /*
-        * Move the top level page table out of trampoline memory,
-        * if it's there.
+        * Move the top level page table out of trampoline memory.
         */
-       if ((void *)__native_read_cr3() == trampoline_pgtable) {
-               memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
-               native_write_cr3((unsigned long)pgtable);
-       }
+       memcpy(pgtable, trampoline_32bit, PAGE_SIZE);
+       native_write_cr3((unsigned long)pgtable);
 
        /* Restore trampoline memory */
        memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
-
-       /* Initialize variables for 5-level paging */
-#ifdef CONFIG_X86_5LEVEL
-       if (__read_cr4() & X86_CR4_LA57) {
-               __pgtable_l5_enabled = 1;
-               pgdir_shift = 48;
-               ptrs_per_p4d = 512;
-       }
-#endif
 }
index 09dc8c1..dc8c876 100644 (file)
@@ -365,22 +365,27 @@ static void enforce_vmpl0(void)
  * by the guest kernel. As and when a new feature is implemented in the
  * guest kernel, a corresponding bit should be added to the mask.
  */
-#define SNP_FEATURES_PRESENT (0)
+#define SNP_FEATURES_PRESENT   MSR_AMD64_SNP_DEBUG_SWAP
+
+u64 snp_get_unsupported_features(u64 status)
+{
+       if (!(status & MSR_AMD64_SEV_SNP_ENABLED))
+               return 0;
+
+       return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+}
 
 void snp_check_features(void)
 {
        u64 unsupported;
 
-       if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
-               return;
-
        /*
         * Terminate the boot if hypervisor has enabled any feature lacking
         * guest side implementation. Pass on the unsupported features mask through
         * EXIT_INFO_2 of the GHCB protocol so that those features can be reported
         * as part of the guest boot failure.
         */
-       unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
+       unsupported = snp_get_unsupported_features(sev_status);
        if (unsupported) {
                if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
                        sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
@@ -390,32 +395,22 @@ void snp_check_features(void)
        }
 }
 
-void sev_enable(struct boot_params *bp)
+/*
+ * sev_check_cpu_support - Check for SEV support in the CPU capabilities
+ *
+ * Returns < 0 if SEV is not supported, otherwise the position of the
+ * encryption bit in the page table descriptors.
+ */
+static int sev_check_cpu_support(void)
 {
        unsigned int eax, ebx, ecx, edx;
-       struct msr m;
-       bool snp;
-
-       /*
-        * bp->cc_blob_address should only be set by boot/compressed kernel.
-        * Initialize it to 0 to ensure that uninitialized values from
-        * buggy bootloaders aren't propagated.
-        */
-       if (bp)
-               bp->cc_blob_address = 0;
-
-       /*
-        * Setup/preliminary detection of SNP. This will be sanity-checked
-        * against CPUID/MSR values later.
-        */
-       snp = snp_init(bp);
 
        /* Check for the SME/SEV support leaf */
        eax = 0x80000000;
        ecx = 0;
        native_cpuid(&eax, &ebx, &ecx, &edx);
        if (eax < 0x8000001f)
-               return;
+               return -ENODEV;
 
        /*
         * Check for the SME/SEV feature:
@@ -429,7 +424,48 @@ void sev_enable(struct boot_params *bp)
        ecx = 0;
        native_cpuid(&eax, &ebx, &ecx, &edx);
        /* Check whether SEV is supported */
-       if (!(eax & BIT(1))) {
+       if (!(eax & BIT(1)))
+               return -ENODEV;
+
+       return ebx & 0x3f;
+}
+
+void sev_enable(struct boot_params *bp)
+{
+       struct msr m;
+       int bitpos;
+       bool snp;
+
+       /*
+        * bp->cc_blob_address should only be set by boot/compressed kernel.
+        * Initialize it to 0 to ensure that uninitialized values from
+        * buggy bootloaders aren't propagated.
+        */
+       if (bp)
+               bp->cc_blob_address = 0;
+
+       /*
+        * Do an initial SEV capability check before snp_init() which
+        * loads the CPUID page and the same checks afterwards are done
+        * without the hypervisor and are trustworthy.
+        *
+        * If the HV fakes SEV support, the guest will crash'n'burn
+        * which is good enough.
+        */
+
+       if (sev_check_cpu_support() < 0)
+               return;
+
+       /*
+        * Setup/preliminary detection of SNP. This will be sanity-checked
+        * against CPUID/MSR values later.
+        */
+       snp = snp_init(bp);
+
+       /* Now repeat the checks with the SNP CPUID table. */
+
+       bitpos = sev_check_cpu_support();
+       if (bitpos < 0) {
                if (snp)
                        error("SEV-SNP support indicated by CC blob, but not CPUID.");
                return;
@@ -461,7 +497,24 @@ void sev_enable(struct boot_params *bp)
        if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
                error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
 
-       sme_me_mask = BIT_ULL(ebx & 0x3f);
+       sme_me_mask = BIT_ULL(bitpos);
+}
+
+/*
+ * sev_get_status - Retrieve the SEV status mask
+ *
+ * Returns 0 if the CPU is not SEV capable, otherwise the value of the
+ * AMD64_SEV MSR.
+ */
+u64 sev_get_status(void)
+{
+       struct msr m;
+
+       if (sev_check_cpu_support() < 0)
+               return 0;
+
+       boot_rdmsr(MSR_AMD64_SEV, &m);
+       return m.q;
 }
 
 /* Search for Confidential Computing blob in the EFI config table. */
index 3cf3491..1b411bb 100644 (file)
@@ -33,7 +33,6 @@ CONFIG_HYPERVISOR_GUEST=y
 CONFIG_PARAVIRT=y
 CONFIG_NR_CPUS=8
 CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_MICROCODE_AMD=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
@@ -245,7 +244,7 @@ CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
 CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
index 2775923..409e918 100644 (file)
@@ -31,7 +31,6 @@ CONFIG_SMP=y
 CONFIG_HYPERVISOR_GUEST=y
 CONFIG_PARAVIRT=y
 CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
-CONFIG_MICROCODE_AMD=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
 CONFIG_NUMA=y
@@ -242,7 +241,7 @@ CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
 CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
index 91397f5..6e6af42 100644 (file)
@@ -720,26 +720,6 @@ SYM_CODE_END(__switch_to_asm)
 .popsection
 
 /*
- * The unwinder expects the last frame on the stack to always be at the same
- * offset from the end of the page, which allows it to validate the stack.
- * Calling schedule_tail() directly would break that convention because its an
- * asmlinkage function so its argument has to be pushed on the stack.  This
- * wrapper creates a proper "end of stack" frame header before the call.
- */
-.pushsection .text, "ax"
-SYM_FUNC_START(schedule_tail_wrapper)
-       FRAME_BEGIN
-
-       pushl   %eax
-       call    schedule_tail
-       popl    %eax
-
-       FRAME_END
-       RET
-SYM_FUNC_END(schedule_tail_wrapper)
-.popsection
-
-/*
  * A newly forked process directly context switches into this address.
  *
  * eax: prev task we switched from
@@ -747,29 +727,22 @@ SYM_FUNC_END(schedule_tail_wrapper)
  * edi: kernel thread arg
  */
 .pushsection .text, "ax"
-SYM_CODE_START(ret_from_fork)
-       call    schedule_tail_wrapper
+SYM_CODE_START(ret_from_fork_asm)
+       movl    %esp, %edx      /* regs */
 
-       testl   %ebx, %ebx
-       jnz     1f              /* kernel threads are uncommon */
+       /* return address for the stack unwinder */
+       pushl   $.Lsyscall_32_done
 
-2:
-       /* When we fork, we trace the syscall return in the child, too. */
-       movl    %esp, %eax
-       call    syscall_exit_to_user_mode
-       jmp     .Lsyscall_32_done
+       FRAME_BEGIN
+       /* prev already in EAX */
+       movl    %ebx, %ecx      /* fn */
+       pushl   %edi            /* fn_arg */
+       call    ret_from_fork
+       addl    $4, %esp
+       FRAME_END
 
-       /* kernel thread */
-1:     movl    %edi, %eax
-       CALL_NOSPEC ebx
-       /*
-        * A kernel thread is allowed to return here after successfully
-        * calling kernel_execve().  Exit to userspace to complete the execve()
-        * syscall.
-        */
-       movl    $0, PT_EAX(%esp)
-       jmp     2b
-SYM_CODE_END(ret_from_fork)
+       RET
+SYM_CODE_END(ret_from_fork_asm)
 .popsection
 
 SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
index f31e286..43606de 100644 (file)
@@ -284,36 +284,33 @@ SYM_FUNC_END(__switch_to_asm)
  * r12: kernel thread arg
  */
 .pushsection .text, "ax"
-       __FUNC_ALIGN
-SYM_CODE_START_NOALIGN(ret_from_fork)
+SYM_CODE_START(ret_from_fork_asm)
+       /*
+        * This is the start of the kernel stack; even through there's a
+        * register set at the top, the regset isn't necessarily coherent
+        * (consider kthreads) and one cannot unwind further.
+        *
+        * This ensures stack unwinds of kernel threads terminate in a known
+        * good state.
+        */
        UNWIND_HINT_END_OF_STACK
        ANNOTATE_NOENDBR // copy_thread
        CALL_DEPTH_ACCOUNT
-       movq    %rax, %rdi
-       call    schedule_tail                   /* rdi: 'prev' task parameter */
 
-       testq   %rbx, %rbx                      /* from kernel_thread? */
-       jnz     1f                              /* kernel threads are uncommon */
+       movq    %rax, %rdi              /* prev */
+       movq    %rsp, %rsi              /* regs */
+       movq    %rbx, %rdx              /* fn */
+       movq    %r12, %rcx              /* fn_arg */
+       call    ret_from_fork
 
-2:
-       UNWIND_HINT_REGS
-       movq    %rsp, %rdi
-       call    syscall_exit_to_user_mode       /* returns with IRQs disabled */
-       jmp     swapgs_restore_regs_and_return_to_usermode
-
-1:
-       /* kernel thread */
-       UNWIND_HINT_END_OF_STACK
-       movq    %r12, %rdi
-       CALL_NOSPEC rbx
        /*
-        * A kernel thread is allowed to return here after successfully
-        * calling kernel_execve().  Exit to userspace to complete the execve()
-        * syscall.
+        * Set the stack state to what is expected for the target function
+        * -- at this point the register set should be a valid user set
+        * and unwind should work normally.
         */
-       movq    $0, RAX(%rsp)
-       jmp     2b
-SYM_CODE_END(ret_from_fork)
+       UNWIND_HINT_REGS
+       jmp     swapgs_restore_regs_and_return_to_usermode
+SYM_CODE_END(ret_from_fork_asm)
 .popsection
 
 .macro DEBUG_ENTRY_ASSERT_IRQS_OFF
index bc0a3c9..2d0b1bd 100644 (file)
 449    i386    futex_waitv             sys_futex_waitv
 450    i386    set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    i386    cachestat               sys_cachestat
+452    i386    fchmodat2               sys_fchmodat2
index 227538b..8147682 100644 (file)
 449    common  futex_waitv             sys_futex_waitv
 450    common  set_mempolicy_home_node sys_set_mempolicy_home_node
 451    common  cachestat               sys_cachestat
+452    common  fchmodat2               sys_fchmodat2
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index 11a5c68..7645730 100644 (file)
@@ -299,8 +299,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
 
        /* Round the lowest possible end address up to a PMD boundary. */
        end = (start + len + PMD_SIZE - 1) & PMD_MASK;
-       if (end >= TASK_SIZE_MAX)
-               end = TASK_SIZE_MAX;
+       if (end >= DEFAULT_MAP_WINDOW)
+               end = DEFAULT_MAP_WINDOW;
        end -= len;
 
        if (end > start) {
index 3710148..6911c53 100644 (file)
@@ -156,8 +156,8 @@ perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
         * count to the generic event atomically:
         */
        prev_raw_count = local64_read(&hwc->prev_count);
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                                       new_raw_count) != prev_raw_count)
+       if (!local64_try_cmpxchg(&hwc->prev_count,
+                                &prev_raw_count, new_raw_count))
                return 0;
 
        /*
@@ -247,11 +247,33 @@ int forward_event_to_ibs(struct perf_event *event)
        return -ENOENT;
 }
 
+/*
+ * Grouping of IBS events is not possible since IBS can have only
+ * one event active at any point in time.
+ */
+static int validate_group(struct perf_event *event)
+{
+       struct perf_event *sibling;
+
+       if (event->group_leader == event)
+               return 0;
+
+       if (event->group_leader->pmu == event->pmu)
+               return -EINVAL;
+
+       for_each_sibling_event(sibling, event->group_leader) {
+               if (sibling->pmu == event->pmu)
+                       return -EINVAL;
+       }
+       return 0;
+}
+
 static int perf_ibs_init(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
        struct perf_ibs *perf_ibs;
        u64 max_cnt, config;
+       int ret;
 
        perf_ibs = get_ibs_pmu(event->attr.type);
        if (!perf_ibs)
@@ -265,6 +287,10 @@ static int perf_ibs_init(struct perf_event *event)
        if (config & ~perf_ibs->config_mask)
                return -EINVAL;
 
+       ret = validate_group(event);
+       if (ret)
+               return ret;
+
        if (hwc->sample_period) {
                if (config & perf_ibs->cnt_mask)
                        /* raw max_cnt may not be set */
@@ -702,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2)
        return op_data2->data_src_lo;
 }
 
-static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
-                                union ibs_op_data3 *op_data3,
-                                struct perf_sample_data *data)
+#define        L(x)            (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT))
+#define        LN(x)           PERF_MEM_S(LVLNUM, x)
+#define        REM             PERF_MEM_S(REMOTE, REMOTE)
+#define        HOPS(x)         PERF_MEM_S(HOPS, x)
+
+static u64 g_data_src[8] = {
+       [IBS_DATA_SRC_LOC_CACHE]          = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0),
+       [IBS_DATA_SRC_DRAM]               = L(LOC_RAM) | LN(RAM),
+       [IBS_DATA_SRC_REM_CACHE]          = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
+       [IBS_DATA_SRC_IO]                 = L(IO) | LN(IO),
+};
+
+#define RMT_NODE_BITS                  (1 << IBS_DATA_SRC_DRAM)
+#define RMT_NODE_APPLICABLE(x)         (RMT_NODE_BITS & (1 << x))
+
+static u64 g_zen4_data_src[32] = {
+       [IBS_DATA_SRC_EXT_LOC_CACHE]      = L(L3) | LN(L3),
+       [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0),
+       [IBS_DATA_SRC_EXT_DRAM]           = L(LOC_RAM) | LN(RAM),
+       [IBS_DATA_SRC_EXT_FAR_CCX_CACHE]  = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1),
+       [IBS_DATA_SRC_EXT_PMEM]           = LN(PMEM),
+       [IBS_DATA_SRC_EXT_IO]             = L(IO) | LN(IO),
+       [IBS_DATA_SRC_EXT_EXT_MEM]        = LN(CXL),
+};
+
+#define ZEN4_RMT_NODE_BITS             ((1 << IBS_DATA_SRC_EXT_DRAM) | \
+                                        (1 << IBS_DATA_SRC_EXT_PMEM) | \
+                                        (1 << IBS_DATA_SRC_EXT_EXT_MEM))
+#define ZEN4_RMT_NODE_APPLICABLE(x)    (ZEN4_RMT_NODE_BITS & (1 << x))
+
+static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
+                                 union ibs_op_data3 *op_data3,
+                                 struct perf_sample_data *data)
 {
        union perf_mem_data_src *data_src = &data->data_src;
        u8 ibs_data_src = perf_ibs_data_src(op_data2);
 
        data_src->mem_lvl = 0;
+       data_src->mem_lvl_num = 0;
 
        /*
         * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
         * memory accesses. So, check DcUcMemAcc bit early.
         */
-       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) {
-               data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
-               return;
-       }
+       if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO)
+               return L(UNC) | LN(UNC);
 
        /* L1 Hit */
-       if (op_data3->dc_miss == 0) {
-               data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
-               return;
-       }
+       if (op_data3->dc_miss == 0)
+               return L(L1) | LN(L1);
 
        /* L2 Hit */
        if (op_data3->l2_miss == 0) {
                /* Erratum #1293 */
                if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
-                   !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) {
-                       data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
-                       return;
-               }
+                   !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc))
+                       return L(L2) | LN(L2);
        }
 
        /*
@@ -743,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2,
        if (data_src->mem_op != PERF_MEM_OP_LOAD)
                goto check_mab;
 
-       /* L3 Hit */
        if (ibs_caps & IBS_CAPS_ZEN4) {
-               if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
-                       data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
-                       return;
-               }
-       } else {
-               if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
-                       data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
-                                           PERF_MEM_LVL_HIT;
-                       return;
-               }
-       }
+               u64 val = g_zen4_data_src[ibs_data_src];
 
-       /* A peer cache in a near CCX */
-       if (ibs_caps & IBS_CAPS_ZEN4 &&
-           ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
-               data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
-               return;
-       }
+               if (!val)
+                       goto check_mab;
 
-       /* A peer cache in a far CCX */
-       if (ibs_caps & IBS_CAPS_ZEN4) {
-               if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
-                       data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
-                       return;
+               /* HOPS_1 because IBS doesn't provide remote socket detail */
+               if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) {
+                       if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM)
+                               val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
+                       else
+                               val |= REM | HOPS(1);
                }
-       } else {
-               if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
-                       data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
-                       return;
-               }
-       }
 
-       /* DRAM */
-       if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
-               if (op_data2->rmt_node == 0)
-                       data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
-               else
-                       data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
-               return;
-       }
+               return val;
+       } else {
+               u64 val = g_data_src[ibs_data_src];
 
-       /* PMEM */
-       if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
-               data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
-               if (op_data2->rmt_node) {
-                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-                       /* IBS doesn't provide Remote socket detail */
-                       data_src->mem_hops = PERF_MEM_HOPS_1;
-               }
-               return;
-       }
+               if (!val)
+                       goto check_mab;
 
-       /* Extension Memory */
-       if (ibs_caps & IBS_CAPS_ZEN4 &&
-           ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
-               data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL;
-               if (op_data2->rmt_node) {
-                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-                       /* IBS doesn't provide Remote socket detail */
-                       data_src->mem_hops = PERF_MEM_HOPS_1;
+               /* HOPS_1 because IBS doesn't provide remote socket detail */
+               if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) {
+                       if (ibs_data_src == IBS_DATA_SRC_DRAM)
+                               val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1);
+                       else
+                               val |= REM | HOPS(1);
                }
-               return;
-       }
 
-       /* IO */
-       if (ibs_data_src == IBS_DATA_SRC_EXT_IO) {
-               data_src->mem_lvl = PERF_MEM_LVL_IO;
-               data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
-               if (op_data2->rmt_node) {
-                       data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
-                       /* IBS doesn't provide Remote socket detail */
-                       data_src->mem_hops = PERF_MEM_HOPS_1;
-               }
-               return;
+               return val;
        }
 
 check_mab:
@@ -829,12 +834,11 @@ check_mab:
         * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
         * MAB only when IBS fails to provide DataSrc.
         */
-       if (op_data3->dc_miss_no_mab_alloc) {
-               data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
-               return;
-       }
+       if (op_data3->dc_miss_no_mab_alloc)
+               return L(LFB) | LN(LFB);
 
-       data_src->mem_lvl = PERF_MEM_LVL_NA;
+       /* Don't set HIT with NA */
+       return PERF_MEM_S(LVL, NA) | LN(NA);
 }
 
 static bool perf_ibs_cache_hit_st_valid(void)
@@ -924,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data,
                                  union ibs_op_data2 *op_data2,
                                  union ibs_op_data3 *op_data3)
 {
-       perf_ibs_get_mem_lvl(op_data2, op_data3, data);
+       union perf_mem_data_src *data_src = &data->data_src;
+
+       data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data);
        perf_ibs_get_mem_snoop(op_data2, data);
        perf_ibs_get_tlb_lvl(op_data3, data);
        perf_ibs_get_mem_lock(op_data3, data);
index 9d24870..185f902 100644 (file)
@@ -129,13 +129,11 @@ u64 x86_perf_event_update(struct perf_event *event)
         * exchange a new raw count - then add that new-prev delta
         * count to the generic event atomically:
         */
-again:
        prev_raw_count = local64_read(&hwc->prev_count);
-       rdpmcl(hwc->event_base_rdpmc, new_raw_count);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                                       new_raw_count) != prev_raw_count)
-               goto again;
+       do {
+               rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+       } while (!local64_try_cmpxchg(&hwc->prev_count,
+                                     &prev_raw_count, new_raw_count));
 
        /*
         * Now we have the new raw value and have updated the prev
@@ -2168,7 +2166,6 @@ static int __init init_hw_perf_events(void)
                        hybrid_pmu->pmu = pmu;
                        hybrid_pmu->pmu.type = -1;
                        hybrid_pmu->pmu.attr_update = x86_pmu.attr_update;
-                       hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS;
                        hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE;
 
                        err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name,
index a149faf..fa355d3 100644 (file)
@@ -2129,6 +2129,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = {
        EVENT_EXTRA_END
 };
 
+EVENT_ATTR_STR(topdown-retiring,       td_retiring_cmt,        "event=0x72,umask=0x0");
+EVENT_ATTR_STR(topdown-bad-spec,       td_bad_spec_cmt,        "event=0x73,umask=0x0");
+
+static struct attribute *cmt_events_attrs[] = {
+       EVENT_PTR(td_fe_bound_tnt),
+       EVENT_PTR(td_retiring_cmt),
+       EVENT_PTR(td_bad_spec_cmt),
+       EVENT_PTR(td_be_bound_tnt),
+       NULL
+};
+
 static struct extra_reg intel_cmt_extra_regs[] __read_mostly = {
        /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
        INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0),
@@ -3993,6 +4004,13 @@ static int intel_pmu_hw_config(struct perf_event *event)
                struct perf_event *leader = event->group_leader;
                struct perf_event *sibling = NULL;
 
+               /*
+                * When this memload event is also the first event (no group
+                * exists yet), then there is no aux event before it.
+                */
+               if (leader == event)
+                       return -ENODATA;
+
                if (!is_mem_loads_aux_event(leader)) {
                        for_each_sibling_event(sibling, leader) {
                                if (is_mem_loads_aux_event(sibling))
@@ -4840,6 +4858,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15");
 
 PMU_FORMAT_ATTR(frontend, "config1:0-23");
 
+PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63");
+
 static struct attribute *intel_arch3_formats_attr[] = {
        &format_attr_event.attr,
        &format_attr_umask.attr,
@@ -4870,6 +4890,13 @@ static struct attribute *slm_format_attr[] = {
        NULL
 };
 
+static struct attribute *cmt_format_attr[] = {
+       &format_attr_offcore_rsp.attr,
+       &format_attr_ldlat.attr,
+       &format_attr_snoop_rsp.attr,
+       NULL
+};
+
 static struct attribute *skl_format_attr[] = {
        &format_attr_frontend.attr,
        NULL,
@@ -5649,7 +5676,6 @@ static struct attribute *adl_hybrid_extra_attr[] = {
        NULL
 };
 
-PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63");
 FORMAT_ATTR_HYBRID(snoop_rsp,  hybrid_small);
 
 static struct attribute *mtl_hybrid_extra_attr_rtm[] = {
@@ -6167,7 +6193,7 @@ __init int intel_pmu_init(void)
                name = "Tremont";
                break;
 
-       case INTEL_FAM6_ALDERLAKE_N:
+       case INTEL_FAM6_ATOM_GRACEMONT:
                x86_pmu.mid_ack = true;
                memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
                       sizeof(hw_cache_event_ids));
@@ -6197,6 +6223,37 @@ __init int intel_pmu_init(void)
                name = "gracemont";
                break;
 
+       case INTEL_FAM6_ATOM_CRESTMONT:
+       case INTEL_FAM6_ATOM_CRESTMONT_X:
+               x86_pmu.mid_ack = true;
+               memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+                      sizeof(hw_cache_event_ids));
+               memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+                      sizeof(hw_cache_extra_regs));
+               hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+               x86_pmu.event_constraints = intel_slm_event_constraints;
+               x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+               x86_pmu.extra_regs = intel_cmt_extra_regs;
+
+               x86_pmu.pebs_aliases = NULL;
+               x86_pmu.pebs_prec_dist = true;
+               x86_pmu.lbr_pt_coexist = true;
+               x86_pmu.pebs_block = true;
+               x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+               x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+               intel_pmu_pebs_data_source_cmt();
+               x86_pmu.pebs_latency_data = mtl_latency_data_small;
+               x86_pmu.get_event_constraints = cmt_get_event_constraints;
+               x86_pmu.limit_period = spr_limit_period;
+               td_attr = cmt_events_attrs;
+               mem_attr = grt_mem_attrs;
+               extra_attr = cmt_format_attr;
+               pr_cont("Crestmont events, ");
+               name = "crestmont";
+               break;
+
        case INTEL_FAM6_WESTMERE:
        case INTEL_FAM6_WESTMERE_EP:
        case INTEL_FAM6_WESTMERE_EX:
index 835862c..96fffb2 100644 (file)
@@ -365,13 +365,11 @@ static void cstate_pmu_event_update(struct perf_event *event)
        struct hw_perf_event *hwc = &event->hw;
        u64 prev_raw_count, new_raw_count;
 
-again:
        prev_raw_count = local64_read(&hwc->prev_count);
-       new_raw_count = cstate_pmu_read_counter(event);
-
-       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-                           new_raw_count) != prev_raw_count)
-               goto again;
+       do {
+               new_raw_count = cstate_pmu_read_counter(event);
+       } while (!local64_try_cmpxchg(&hwc->prev_count,
+                                     &prev_raw_count, new_raw_count));
 
        local64_add(new_raw_count - prev_raw_count, &event->count);
 }
@@ -671,6 +669,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &glm_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,        &glm_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,      &glm_cstates),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,      &adl_cstates),
 
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           &icl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,             &icl_cstates),
@@ -686,7 +685,6 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &icl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_cstates),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         &adl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &adl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        &adl_cstates),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,        &adl_cstates),
index df88576..eb8dd8b 100644 (file)
@@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void)
        __intel_pmu_pebs_data_source_grt(data_source);
 }
 
-static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source)
+static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source)
 {
        data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD);
        data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM);
@@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void)
 
        data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
        memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
-       intel_pmu_pebs_data_source_cmt(data_source);
+       __intel_pmu_pebs_data_source_cmt(data_source);
+}
+
+void __init intel_pmu_pebs_data_source_cmt(void)
+{
+       __intel_pmu_pebs_data_source_cmt(pebs_data_source);
 }
 
 static u64 precise_store_data(u64 status)
index bc22660..69043e0 100644 (file)
@@ -1858,7 +1858,6 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rkl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &adl_uncore_init),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        &adl_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,        &adl_uncore_init),
@@ -1867,6 +1866,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &spr_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,     &spr_uncore_init),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      &snr_uncore_init),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,      &adl_uncore_init),
        {},
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
index d49e90d..4d34998 100644 (file)
@@ -1502,7 +1502,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
 
        pci_dev_put(ubox_dev);
 
-       return err ? pcibios_err_to_errno(err) : 0;
+       return pcibios_err_to_errno(err);
 }
 
 int snbep_uncore_pci_init(void)
index 0feaaa5..9e237b3 100644 (file)
@@ -106,7 +106,7 @@ static bool test_intel(int idx, void *data)
        case INTEL_FAM6_ROCKETLAKE:
        case INTEL_FAM6_ALDERLAKE:
        case INTEL_FAM6_ALDERLAKE_L:
-       case INTEL_FAM6_ALDERLAKE_N:
+       case INTEL_FAM6_ATOM_GRACEMONT:
        case INTEL_FAM6_RAPTORLAKE:
        case INTEL_FAM6_RAPTORLAKE_P:
        case INTEL_FAM6_RAPTORLAKE_S:
@@ -244,12 +244,10 @@ static void msr_event_update(struct perf_event *event)
        s64 delta;
 
        /* Careful, an NMI might modify the previous event value: */
-again:
        prev = local64_read(&event->hw.prev_count);
-       now = msr_read_counter(event);
-
-       if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev)
-               goto again;
+       do {
+               now = msr_read_counter(event);
+       } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, now));
 
        delta = now - prev;
        if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) {
index d6de448..c8ba2be 100644 (file)
@@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void);
 
 void intel_pmu_pebs_data_source_mtl(void);
 
+void intel_pmu_pebs_data_source_cmt(void);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
index 52e6e7e..1579429 100644 (file)
@@ -804,7 +804,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           &model_skl),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &model_skl),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &model_skl),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         &model_skl),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,      &model_skl),
        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &model_spr),
        X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,     &model_spr),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &model_skl),
index 1fbda2f..b21335e 100644 (file)
@@ -107,7 +107,6 @@ static bool cpu_is_self(int cpu)
 static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
                bool exclude_self)
 {
-       struct hv_send_ipi_ex **arg;
        struct hv_send_ipi_ex *ipi_arg;
        unsigned long flags;
        int nr_bank = 0;
@@ -117,9 +116,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
                return false;
 
        local_irq_save(flags);
-       arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
+       ipi_arg = *this_cpu_ptr(hyperv_pcpu_input_arg);
 
-       ipi_arg = *arg;
        if (unlikely(!ipi_arg))
                goto ipi_mask_ex_done;
 
index 6c04b52..953e280 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/apic.h>
 #include <asm/desc.h>
 #include <asm/sev.h>
+#include <asm/ibt.h>
 #include <asm/hypervisor.h>
 #include <asm/hyperv-tlfs.h>
 #include <asm/mshyperv.h>
@@ -472,6 +473,26 @@ void __init hyperv_init(void)
        }
 
        /*
+        * Some versions of Hyper-V that provide IBT in guest VMs have a bug
+        * in that there's no ENDBR64 instruction at the entry to the
+        * hypercall page. Because hypercalls are invoked via an indirect call
+        * to the hypercall page, all hypercall attempts fail when IBT is
+        * enabled, and Linux panics. For such buggy versions, disable IBT.
+        *
+        * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall
+        * page, so if future Linux kernel versions enable IBT for 32-bit
+        * builds, additional hypercall page hackery will be required here
+        * to provide an ENDBR32.
+        */
+#ifdef CONFIG_X86_KERNEL_IBT
+       if (cpu_feature_enabled(X86_FEATURE_IBT) &&
+           *(u32 *)hv_hypercall_pg != gen_endbr()) {
+               setup_clear_cpu_cap(X86_FEATURE_IBT);
+               pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n");
+       }
+#endif
+
+       /*
         * hyperv_init() is called before LAPIC is initialized: see
         * apic_intr_mode_init() -> x86_platform.apic_post_init() and
         * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER
index 85d38b9..db5d2ea 100644 (file)
@@ -25,6 +25,10 @@ void __init hv_vtl_init_platform(void)
        x86_init.irqs.pre_vector_init = x86_init_noop;
        x86_init.timers.timer_init = x86_init_noop;
 
+       /* Avoid searching for BIOS MP tables */
+       x86_init.mpparse.find_smp_config = x86_init_noop;
+       x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+
        x86_platform.get_wallclock = get_rtc_noop;
        x86_platform.set_wallclock = set_rtc_noop;
        x86_platform.get_nmi_reason = hv_get_nmi_reason;
index 14f46ad..28be6df 100644 (file)
@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
 static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
                           enum hv_mem_host_visibility visibility)
 {
-       struct hv_gpa_range_for_visibility **input_pcpu, *input;
+       struct hv_gpa_range_for_visibility *input;
        u16 pages_processed;
        u64 hv_status;
        unsigned long flags;
@@ -263,9 +263,8 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
        }
 
        local_irq_save(flags);
-       input_pcpu = (struct hv_gpa_range_for_visibility **)
-                       this_cpu_ptr(hyperv_pcpu_input_arg);
-       input = *input_pcpu;
+       input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+
        if (unlikely(!input)) {
                local_irq_restore(flags);
                return -EINVAL;
index 8460bd3..1cc1132 100644 (file)
@@ -61,7 +61,6 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
                                   const struct flush_tlb_info *info)
 {
        int cpu, vcpu, gva_n, max_gvas;
-       struct hv_tlb_flush **flush_pcpu;
        struct hv_tlb_flush *flush;
        u64 status;
        unsigned long flags;
@@ -74,10 +73,7 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
 
        local_irq_save(flags);
 
-       flush_pcpu = (struct hv_tlb_flush **)
-                    this_cpu_ptr(hyperv_pcpu_input_arg);
-
-       flush = *flush_pcpu;
+       flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
 
        if (unlikely(!flush)) {
                local_irq_restore(flags);
@@ -178,17 +174,13 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
                                      const struct flush_tlb_info *info)
 {
        int nr_bank = 0, max_gvas, gva_n;
-       struct hv_tlb_flush_ex **flush_pcpu;
        struct hv_tlb_flush_ex *flush;
        u64 status;
 
        if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
                return HV_STATUS_INVALID_PARAMETER;
 
-       flush_pcpu = (struct hv_tlb_flush_ex **)
-                    this_cpu_ptr(hyperv_pcpu_input_arg);
-
-       flush = *flush_pcpu;
+       flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
 
        if (info->mm) {
                /*
index 5d70968..9dc259f 100644 (file)
@@ -19,7 +19,6 @@
 
 int hyperv_flush_guest_mapping(u64 as)
 {
-       struct hv_guest_mapping_flush **flush_pcpu;
        struct hv_guest_mapping_flush *flush;
        u64 status;
        unsigned long flags;
@@ -30,10 +29,7 @@ int hyperv_flush_guest_mapping(u64 as)
 
        local_irq_save(flags);
 
-       flush_pcpu = (struct hv_guest_mapping_flush **)
-               this_cpu_ptr(hyperv_pcpu_input_arg);
-
-       flush = *flush_pcpu;
+       flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
 
        if (unlikely(!flush)) {
                local_irq_restore(flags);
@@ -90,7 +86,6 @@ EXPORT_SYMBOL_GPL(hyperv_fill_flush_guest_mapping_list);
 int hyperv_flush_guest_mapping_range(u64 as,
                hyperv_fill_flush_list_func fill_flush_list_func, void *data)
 {
-       struct hv_guest_mapping_flush_list **flush_pcpu;
        struct hv_guest_mapping_flush_list *flush;
        u64 status;
        unsigned long flags;
@@ -102,10 +97,8 @@ int hyperv_flush_guest_mapping_range(u64 as,
 
        local_irq_save(flags);
 
-       flush_pcpu = (struct hv_guest_mapping_flush_list **)
-               this_cpu_ptr(hyperv_pcpu_input_arg);
+       flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
 
-       flush = *flush_pcpu;
        if (unlikely(!flush)) {
                local_irq_restore(flags);
                goto fault;
index 8eb74cf..c8a7fc2 100644 (file)
@@ -6,7 +6,7 @@
  *  Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
  *  Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
  */
-#include <acpi/pdc_intel.h>
+#include <acpi/proc_cap_intel.h>
 
 #include <asm/numa.h>
 #include <asm/fixmap.h>
@@ -15,6 +15,7 @@
 #include <asm/mpspec.h>
 #include <asm/x86_init.h>
 #include <asm/cpufeature.h>
+#include <asm/irq_vectors.h>
 
 #ifdef CONFIG_ACPI_APEI
 # include <asm/pgtable_types.h>
@@ -31,6 +32,7 @@ extern int acpi_skip_timer_override;
 extern int acpi_use_timer_override;
 extern int acpi_fix_pin2_polarity;
 extern int acpi_disable_cmcff;
+extern bool acpi_int_src_ovr[NR_IRQS_LEGACY];
 
 extern u8 acpi_sci_flags;
 extern u32 acpi_sci_override_gsi;
@@ -100,23 +102,31 @@ static inline bool arch_has_acpi_pdc(void)
                c->x86_vendor == X86_VENDOR_CENTAUR);
 }
 
-static inline void arch_acpi_set_pdc_bits(u32 *buf)
+static inline void arch_acpi_set_proc_cap_bits(u32 *cap)
 {
        struct cpuinfo_x86 *c = &cpu_data(0);
 
-       buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
+       *cap |= ACPI_PROC_CAP_C_CAPABILITY_SMP;
+
+       /* Enable coordination with firmware's _TSD info */
+       *cap |= ACPI_PROC_CAP_SMP_T_SWCOORD;
 
        if (cpu_has(c, X86_FEATURE_EST))
-               buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
+               *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SWSMP;
 
        if (cpu_has(c, X86_FEATURE_ACPI))
-               buf[2] |= ACPI_PDC_T_FFH;
+               *cap |= ACPI_PROC_CAP_T_FFH;
+
+       if (cpu_has(c, X86_FEATURE_HWP))
+               *cap |= ACPI_PROC_CAP_COLLAB_PROC_PERF;
 
        /*
-        * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+        * If mwait/monitor is unsupported, C_C1_FFH and
+        * C2/C3_FFH will be disabled.
         */
-       if (!cpu_has(c, X86_FEATURE_MWAIT))
-               buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+       if (!cpu_has(c, X86_FEATURE_MWAIT) ||
+           boot_option_idle_override == IDLE_NOMWAIT)
+               *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH);
 }
 
 static inline bool acpi_has_cpu_in_madt(void)
index 6c15a62..9c4da69 100644 (file)
@@ -96,7 +96,7 @@ extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 extern void apply_retpolines(s32 *start, s32 *end);
 extern void apply_returns(s32 *start, s32 *end);
-extern void apply_ibt_endbr(s32 *start, s32 *end);
+extern void apply_seal_endbr(s32 *start, s32 *end);
 extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
                          s32 *start_cfi, s32 *end_cfi);
 
index 9191280..4ae1433 100644 (file)
 # define BOOT_STACK_SIZE       0x1000
 #endif
 
+#ifndef __ASSEMBLY__
+extern unsigned int output_len;
+extern const unsigned long kernel_total_size;
+
+unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
+                               void (*error)(char *x));
+#endif
+
 #endif /* _ASM_X86_BOOT_H */
index cb8ca46..b69b0d7 100644 (file)
@@ -14,7 +14,7 @@
  * Defines x86 CPU feature bits
  */
 #define NCAPINTS                       21         /* N 32-bit words worth of info */
-#define NBUGINTS                       1          /* N 32-bit bug flags */
+#define NBUGINTS                       2          /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
 #define X86_FEATURE_SMBA               (11*32+21) /* "" Slow Memory Bandwidth Allocation */
 #define X86_FEATURE_BMEC               (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
 
+#define X86_FEATURE_SRSO               (11*32+24) /* "" AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS         (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT     (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16                (12*32+ 5) /* AVX512 BFLOAT16 instructions */
 #define X86_FEATURE_AUTOIBRS           (20*32+ 8) /* "" Automatic IBRS */
 #define X86_FEATURE_NO_SMM_CTL_MSR     (20*32+ 9) /* "" SMM_CTL MSR is not present */
 
+#define X86_FEATURE_SBPB               (20*32+27) /* "" Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE                (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO            (20*32+29) /* "" CPU is not affected by SRSO */
+
 /*
  * BUG word(s)
  */
 #define X86_BUG_RETBLEED               X86_BUG(27) /* CPU is affected by RETBleed */
 #define X86_BUG_EIBRS_PBRSB            X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
 #define X86_BUG_SMT_RSB                        X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS                    X86_BUG(30) /* CPU is affected by Gather Data Sampling */
 
+/* BUG word 2 */
+#define X86_BUG_SRSO                   X86_BUG(1*32 + 0) /* AMD SRSO bug */
+#define X86_BUG_DIV0                   X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
 #endif /* _ASM_X86_CPUFEATURES_H */
index b8f1dc0..9931e4c 100644 (file)
@@ -71,6 +71,12 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
 }
 #define mul_u32_u32 mul_u32_u32
 
+/*
+ * __div64_32() is never called on x86, so prevent the
+ * generic definition from getting built.
+ */
+#define __div64_32
+
 #else
 # include <asm-generic/div64.h>
 
index 8b4be7c..b0994ae 100644 (file)
@@ -90,6 +90,8 @@ static inline void efi_fpu_end(void)
 }
 
 #ifdef CONFIG_X86_32
+#define EFI_X86_KERNEL_ALLOC_LIMIT             (SZ_512M - 1)
+
 #define arch_efi_call_virt_setup()                                     \
 ({                                                                     \
        efi_fpu_begin();                                                \
@@ -103,8 +105,7 @@ static inline void efi_fpu_end(void)
 })
 
 #else /* !CONFIG_X86_32 */
-
-#define EFI_LOADER_SIGNATURE   "EL64"
+#define EFI_X86_KERNEL_ALLOC_LIMIT             EFI_ALLOC_LIMIT
 
 extern asmlinkage u64 __efi_call(void *fp, ...);
 
@@ -218,6 +219,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
 
 #ifdef CONFIG_EFI_MIXED
 
+#define EFI_ALLOC_LIMIT                (efi_is_64bit() ? ULONG_MAX : U32_MAX)
+
 #define ARCH_HAS_EFISTUB_WRAPPERS
 
 static inline bool efi_is_64bit(void)
index 1179038..ce8f501 100644 (file)
@@ -92,6 +92,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
 static __always_inline void arch_exit_to_user_mode(void)
 {
        mds_user_clear_cpu_buffers();
+       amd_clear_divider();
 }
 #define arch_exit_to_user_mode arch_exit_to_user_mode
 
index baae6b4..1e59581 100644 (file)
@@ -34,7 +34,7 @@
 /*
  * Create a dummy function pointer reference to prevent objtool from marking
  * the function as needing to be "sealed" (i.e. ENDBR converted to NOP by
- * apply_ibt_endbr()).
+ * apply_seal_endbr()).
  */
 #define IBT_NOSEAL(fname)                              \
        ".pushsection .discard.ibt_endbr_noseal\n\t"    \
index b3af2d4..5fcd85f 100644 (file)
@@ -98,8 +98,6 @@
 #define INTEL_FAM6_ICELAKE_L           0x7E    /* Sunny Cove */
 #define INTEL_FAM6_ICELAKE_NNPI                0x9D    /* Sunny Cove */
 
-#define INTEL_FAM6_LAKEFIELD           0x8A    /* Sunny Cove / Tremont */
-
 #define INTEL_FAM6_ROCKETLAKE          0xA7    /* Cypress Cove */
 
 #define INTEL_FAM6_TIGERLAKE_L         0x8C    /* Willow Cove */
 #define INTEL_FAM6_GRANITERAPIDS_X     0xAD
 #define INTEL_FAM6_GRANITERAPIDS_D     0xAE
 
+/* "Hybrid" Processors (P-Core/E-Core) */
+
+#define INTEL_FAM6_LAKEFIELD           0x8A    /* Sunny Cove / Tremont */
+
 #define INTEL_FAM6_ALDERLAKE           0x97    /* Golden Cove / Gracemont */
 #define INTEL_FAM6_ALDERLAKE_L         0x9A    /* Golden Cove / Gracemont */
-#define INTEL_FAM6_ALDERLAKE_N         0xBE
 
-#define INTEL_FAM6_RAPTORLAKE          0xB7
+#define INTEL_FAM6_RAPTORLAKE          0xB7    /* Raptor Cove / Enhanced Gracemont */
 #define INTEL_FAM6_RAPTORLAKE_P                0xBA
 #define INTEL_FAM6_RAPTORLAKE_S                0xBF
 
 #define INTEL_FAM6_METEORLAKE          0xAC
 #define INTEL_FAM6_METEORLAKE_L                0xAA
 
-#define INTEL_FAM6_LUNARLAKE_M         0xBD
-
 #define INTEL_FAM6_ARROWLAKE           0xC6
 
+#define INTEL_FAM6_LUNARLAKE_M         0xBD
+
 /* "Small Core" Processors (Atom/E-Core) */
 
 #define INTEL_FAM6_ATOM_BONNELL                0x1C /* Diamondville, Pineview */
 #define INTEL_FAM6_ATOM_TREMONT                0x96 /* Elkhart Lake */
 #define INTEL_FAM6_ATOM_TREMONT_L      0x9C /* Jasper Lake */
 
-#define INTEL_FAM6_SIERRAFOREST_X      0xAF
+#define INTEL_FAM6_ATOM_GRACEMONT      0xBE /* Alderlake N */
 
-#define INTEL_FAM6_GRANDRIDGE          0xB6
+#define INTEL_FAM6_ATOM_CRESTMONT_X    0xAF /* Sierra Forest */
+#define INTEL_FAM6_ATOM_CRESTMONT      0xB6 /* Grand Ridge */
 
 /* Xeon Phi */
 
index 13bc212..e3054e3 100644 (file)
@@ -37,6 +37,7 @@ KVM_X86_OP(get_segment)
 KVM_X86_OP(get_cpl)
 KVM_X86_OP(set_segment)
 KVM_X86_OP(get_cs_db_l_bits)
+KVM_X86_OP(is_valid_cr0)
 KVM_X86_OP(set_cr0)
 KVM_X86_OP_OPTIONAL(post_set_cr3)
 KVM_X86_OP(is_valid_cr4)
index 28bd383..3bc146d 100644 (file)
@@ -1566,9 +1566,10 @@ struct kvm_x86_ops {
        void (*set_segment)(struct kvm_vcpu *vcpu,
                            struct kvm_segment *var, int seg);
        void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+       bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
        void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
        void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
-       bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
+       bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
        void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
        int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
        void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
index 0953aa3..97a3de7 100644 (file)
@@ -21,7 +21,7 @@
 #define FUNCTION_PADDING
 #endif
 
-#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO)
+#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
 # define __FUNC_ALIGN          __ALIGN; FUNCTION_PADDING
 #else
 # define __FUNC_ALIGN          __ALIGN
index 56d4ef6..635132a 100644 (file)
@@ -127,8 +127,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new)
 
 static inline bool local_try_cmpxchg(local_t *l, long *old, long new)
 {
-       typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old;
-       return try_cmpxchg_local(&l->a.counter, __old, new);
+       return try_cmpxchg_local(&l->a.counter,
+                                (typeof(l->a.counter) *) old, new);
 }
 
 /* Always has a lock prefix */
index 7f97a8a..473b16d 100644 (file)
@@ -50,8 +50,8 @@ void __init sme_enable(struct boot_params *bp);
 
 int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
 int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
-void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
-                                           bool enc);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr,
+                                           unsigned long size, bool enc);
 
 void __init mem_encrypt_free_decrypted_mem(void);
 
@@ -85,7 +85,7 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
 static inline int __init
 early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
 static inline void __init
-early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) {}
 
 static inline void mem_encrypt_free_decrypted_mem(void) { }
 
index 320566a..bbbe9d7 100644 (file)
 #ifndef _ASM_X86_MICROCODE_H
 #define _ASM_X86_MICROCODE_H
 
-#include <asm/cpu.h>
-#include <linux/earlycpio.h>
-#include <linux/initrd.h>
-
-struct ucode_patch {
-       struct list_head plist;
-       void *data;             /* Intel uses only this one */
-       unsigned int size;
-       u32 patch_id;
-       u16 equiv_cpu;
-};
-
-extern struct list_head microcode_cache;
-
 struct cpu_signature {
        unsigned int sig;
        unsigned int pf;
        unsigned int rev;
 };
 
-struct device;
-
-enum ucode_state {
-       UCODE_OK        = 0,
-       UCODE_NEW,
-       UCODE_UPDATED,
-       UCODE_NFOUND,
-       UCODE_ERROR,
-};
-
-struct microcode_ops {
-       enum ucode_state (*request_microcode_fw) (int cpu, struct device *);
-
-       void (*microcode_fini_cpu) (int cpu);
-
-       /*
-        * The generic 'microcode_core' part guarantees that
-        * the callbacks below run on a target cpu when they
-        * are being called.
-        * See also the "Synchronization" section in microcode_core.c.
-        */
-       enum ucode_state (*apply_microcode) (int cpu);
-       int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
-};
-
 struct ucode_cpu_info {
        struct cpu_signature    cpu_sig;
        void                    *mc;
 };
-extern struct ucode_cpu_info ucode_cpu_info[];
-struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa);
-
-#ifdef CONFIG_MICROCODE_INTEL
-extern struct microcode_ops * __init init_intel_microcode(void);
-#else
-static inline struct microcode_ops * __init init_intel_microcode(void)
-{
-       return NULL;
-}
-#endif /* CONFIG_MICROCODE_INTEL */
 
-#ifdef CONFIG_MICROCODE_AMD
-extern struct microcode_ops * __init init_amd_microcode(void);
-extern void __exit exit_amd_microcode(void);
+#ifdef CONFIG_MICROCODE
+void load_ucode_bsp(void);
+void load_ucode_ap(void);
+void microcode_bsp_resume(void);
 #else
-static inline struct microcode_ops * __init init_amd_microcode(void)
-{
-       return NULL;
-}
-static inline void __exit exit_amd_microcode(void) {}
+static inline void load_ucode_bsp(void)        { }
+static inline void load_ucode_ap(void) { }
+static inline void microcode_bsp_resume(void) { }
 #endif
 
-#define MAX_UCODE_COUNT 128
+#ifdef CONFIG_CPU_SUP_INTEL
+/* Intel specific microcode defines. Public for IFS */
+struct microcode_header_intel {
+       unsigned int    hdrver;
+       unsigned int    rev;
+       unsigned int    date;
+       unsigned int    sig;
+       unsigned int    cksum;
+       unsigned int    ldrver;
+       unsigned int    pf;
+       unsigned int    datasize;
+       unsigned int    totalsize;
+       unsigned int    metasize;
+       unsigned int    reserved[2];
+};
 
-#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
-#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
-#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
-#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
-#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
-#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
-#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+struct microcode_intel {
+       struct microcode_header_intel   hdr;
+       unsigned int                    bits[];
+};
 
-#define CPUID_IS(a, b, c, ebx, ecx, edx)       \
-               (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c))))
+#define DEFAULT_UCODE_DATASIZE         (2000)
+#define MC_HEADER_SIZE                 (sizeof(struct microcode_header_intel))
+#define MC_HEADER_TYPE_MICROCODE       1
+#define MC_HEADER_TYPE_IFS             2
 
-/*
- * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
- * x86_cpuid_vendor() gets vendor id for BSP.
- *
- * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
- * coding, we still use x86_cpuid_vendor() to get vendor id for AP.
- *
- * x86_cpuid_vendor() gets vendor information directly from CPUID.
- */
-static inline int x86_cpuid_vendor(void)
+static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr)
 {
-       u32 eax = 0x00000000;
-       u32 ebx, ecx = 0, edx;
-
-       native_cpuid(&eax, &ebx, &ecx, &edx);
-
-       if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
-               return X86_VENDOR_INTEL;
-
-       if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
-               return X86_VENDOR_AMD;
-
-       return X86_VENDOR_UNKNOWN;
+       return hdr->datasize ? : DEFAULT_UCODE_DATASIZE;
 }
 
-static inline unsigned int x86_cpuid_family(void)
+static inline u32 intel_get_microcode_revision(void)
 {
-       u32 eax = 0x00000001;
-       u32 ebx, ecx = 0, edx;
+       u32 rev, dummy;
+
+       native_wrmsrl(MSR_IA32_UCODE_REV, 0);
 
-       native_cpuid(&eax, &ebx, &ecx, &edx);
+       /* As documented in the SDM: Do a CPUID 1 here */
+       native_cpuid_eax(1);
 
-       return x86_family(eax);
+       /* get the current revision from MSR 0x8B */
+       native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
+
+       return rev;
 }
 
-#ifdef CONFIG_MICROCODE
-extern void __init load_ucode_bsp(void);
-extern void load_ucode_ap(void);
-void reload_early_microcode(unsigned int cpu);
-extern bool initrd_gone;
-void microcode_bsp_resume(void);
-#else
-static inline void __init load_ucode_bsp(void)                 { }
-static inline void load_ucode_ap(void)                         { }
-static inline void reload_early_microcode(unsigned int cpu)    { }
-static inline void microcode_bsp_resume(void)                  { }
-#endif
+void show_ucode_info_early(void);
+
+#else /* CONFIG_CPU_SUP_INTEL */
+static inline void show_ucode_info_early(void) { }
+#endif /* !CONFIG_CPU_SUP_INTEL */
 
 #endif /* _ASM_X86_MICROCODE_H */
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
deleted file mode 100644 (file)
index e6662ad..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MICROCODE_AMD_H
-#define _ASM_X86_MICROCODE_AMD_H
-
-#include <asm/microcode.h>
-
-#define UCODE_MAGIC                    0x00414d44
-#define UCODE_EQUIV_CPU_TABLE_TYPE     0x00000000
-#define UCODE_UCODE_TYPE               0x00000001
-
-#define SECTION_HDR_SIZE               8
-#define CONTAINER_HDR_SZ               12
-
-struct equiv_cpu_entry {
-       u32     installed_cpu;
-       u32     fixed_errata_mask;
-       u32     fixed_errata_compare;
-       u16     equiv_cpu;
-       u16     res;
-} __attribute__((packed));
-
-struct microcode_header_amd {
-       u32     data_code;
-       u32     patch_id;
-       u16     mc_patch_data_id;
-       u8      mc_patch_data_len;
-       u8      init_flag;
-       u32     mc_patch_data_checksum;
-       u32     nb_dev_id;
-       u32     sb_dev_id;
-       u16     processor_rev_id;
-       u8      nb_rev_id;
-       u8      sb_rev_id;
-       u8      bios_api_rev;
-       u8      reserved1[3];
-       u32     match_reg[8];
-} __attribute__((packed));
-
-struct microcode_amd {
-       struct microcode_header_amd     hdr;
-       unsigned int                    mpb[];
-};
-
-#define PATCH_MAX_SIZE (3 * PAGE_SIZE)
-
-#ifdef CONFIG_MICROCODE_AMD
-extern void __init load_ucode_amd_bsp(unsigned int family);
-extern void load_ucode_amd_ap(unsigned int family);
-extern int __init save_microcode_in_initrd_amd(unsigned int family);
-void reload_ucode_amd(unsigned int cpu);
-#else
-static inline void __init load_ucode_amd_bsp(unsigned int family) {}
-static inline void load_ucode_amd_ap(unsigned int family) {}
-static inline int __init
-save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
-static inline void reload_ucode_amd(unsigned int cpu) {}
-#endif
-#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h
deleted file mode 100644 (file)
index f1fa979..0000000
+++ /dev/null
@@ -1,88 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_MICROCODE_INTEL_H
-#define _ASM_X86_MICROCODE_INTEL_H
-
-#include <asm/microcode.h>
-
-struct microcode_header_intel {
-       unsigned int            hdrver;
-       unsigned int            rev;
-       unsigned int            date;
-       unsigned int            sig;
-       unsigned int            cksum;
-       unsigned int            ldrver;
-       unsigned int            pf;
-       unsigned int            datasize;
-       unsigned int            totalsize;
-       unsigned int            metasize;
-       unsigned int            reserved[2];
-};
-
-struct microcode_intel {
-       struct microcode_header_intel hdr;
-       unsigned int            bits[];
-};
-
-/* microcode format is extended from prescott processors */
-struct extended_signature {
-       unsigned int            sig;
-       unsigned int            pf;
-       unsigned int            cksum;
-};
-
-struct extended_sigtable {
-       unsigned int            count;
-       unsigned int            cksum;
-       unsigned int            reserved[3];
-       struct extended_signature sigs[];
-};
-
-#define DEFAULT_UCODE_DATASIZE (2000)
-#define MC_HEADER_SIZE         (sizeof(struct microcode_header_intel))
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
-#define EXT_HEADER_SIZE                (sizeof(struct extended_sigtable))
-#define EXT_SIGNATURE_SIZE     (sizeof(struct extended_signature))
-#define MC_HEADER_TYPE_MICROCODE       1
-#define MC_HEADER_TYPE_IFS             2
-
-#define get_totalsize(mc) \
-       (((struct microcode_intel *)mc)->hdr.datasize ? \
-        ((struct microcode_intel *)mc)->hdr.totalsize : \
-        DEFAULT_UCODE_TOTALSIZE)
-
-#define get_datasize(mc) \
-       (((struct microcode_intel *)mc)->hdr.datasize ? \
-        ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
-
-#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
-
-static inline u32 intel_get_microcode_revision(void)
-{
-       u32 rev, dummy;
-
-       native_wrmsrl(MSR_IA32_UCODE_REV, 0);
-
-       /* As documented in the SDM: Do a CPUID 1 here */
-       native_cpuid_eax(1);
-
-       /* get the current revision from MSR 0x8B */
-       native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev);
-
-       return rev;
-}
-
-#ifdef CONFIG_MICROCODE_INTEL
-extern void __init load_ucode_intel_bsp(void);
-extern void load_ucode_intel_ap(void);
-extern void show_ucode_info_early(void);
-extern int __init save_microcode_in_initrd_intel(void);
-void reload_ucode_intel(void);
-#else
-static inline __init void load_ucode_intel_bsp(void) {}
-static inline void load_ucode_intel_ap(void) {}
-static inline void show_ucode_info_early(void) {}
-static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; }
-static inline void reload_ucode_intel(void) {}
-#endif
-
-#endif /* _ASM_X86_MICROCODE_INTEL_H */
index 88d9ef9..fa83d88 100644 (file)
@@ -5,7 +5,7 @@
 #include <linux/types.h>
 #include <linux/nmi.h>
 #include <linux/msi.h>
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/hyperv-tlfs.h>
 #include <asm/nospec-branch.h>
 #include <asm/paravirt.h>
index 3aedae6..1d11135 100644 (file)
@@ -57,6 +57,7 @@
 
 #define MSR_IA32_PRED_CMD              0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB                  BIT(0)     /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_SBPB                  BIT(7)     /* Selective Branch Prediction Barrier */
 
 #define MSR_PPIN_CTL                   0x0000004e
 #define MSR_PPIN                       0x0000004f
                                                 * Not susceptible to Post-Barrier
                                                 * Return Stack Buffer Predictions.
                                                 */
+#define ARCH_CAP_GDS_CTRL              BIT(25) /*
+                                                * CPU is vulnerable to Gather
+                                                * Data Sampling (GDS) and
+                                                * has controls for mitigation.
+                                                */
+#define ARCH_CAP_GDS_NO                        BIT(26) /*
+                                                * CPU is not vulnerable to Gather
+                                                * Data Sampling (GDS).
+                                                */
 
 #define ARCH_CAP_XAPIC_DISABLE         BIT(21) /*
                                                 * IA32_XAPIC_DISABLE_STATUS MSR
 #define RNGDS_MITG_DIS                 BIT(0)  /* SRBDS support */
 #define RTM_ALLOW                      BIT(1)  /* TSX development mode */
 #define FB_CLEAR_DIS                   BIT(3)  /* CPU Fill buffer clear disable */
+#define GDS_MITG_DIS                   BIT(4)  /* Disable GDS mitigation */
+#define GDS_MITG_LOCKED                        BIT(5)  /* GDS mitigation locked */
 
 #define MSR_IA32_SYSENTER_CS           0x00000174
 #define MSR_IA32_SYSENTER_ESP          0x00000175
 #define MSR_AMD64_DE_CFG               0xc0011029
 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT   1
 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE      BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
 
 #define MSR_AMD64_BU_CFG2              0xc001102a
 #define MSR_AMD64_IBSFETCHCTL          0xc0011030
index 55388c9..c55cc24 100644 (file)
  * eventually turn into it's own annotation.
  */
 .macro VALIDATE_UNRET_END
-#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY)
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+       (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
        ANNOTATE_RETPOLINE_SAFE
        nop
 #endif
  * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
  * indirect jmp/call which may be susceptible to the Spectre variant 2
  * attack.
+ *
+ * NOTE: these do not take kCFI into account and are thus not comparable to C
+ * indirect calls, take care when using. The target of these should be an ENDBR
+ * instruction irrespective of kCFI.
  */
 .macro JMP_NOSPEC reg:req
 #ifdef CONFIG_RETPOLINE
 .endm
 
 #ifdef CONFIG_CPU_UNRET_ENTRY
-#define CALL_ZEN_UNTRAIN_RET   "call zen_untrain_ret"
+#define CALL_UNTRAIN_RET       "call entry_untrain_ret"
 #else
-#define CALL_ZEN_UNTRAIN_RET   ""
+#define CALL_UNTRAIN_RET       ""
 #endif
 
 /*
  * return thunk isn't mapped into the userspace tables (then again, AMD
  * typically has NO_MELTDOWN).
  *
- * While zen_untrain_ret() doesn't clobber anything but requires stack,
+ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
  * entry_ibpb() will clobber AX, CX, DX.
  *
  * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
  */
 .macro UNTRAIN_RET
 #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
-       defined(CONFIG_CALL_DEPTH_TRACKING)
+       defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
        VALIDATE_UNRET_END
        ALTERNATIVE_3 "",                                               \
-                     CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,          \
+                     CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
                      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,        \
                      __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
 #endif
 .endm
 
+.macro UNTRAIN_RET_VM
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+       defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+       VALIDATE_UNRET_END
+       ALTERNATIVE_3 "",                                               \
+                     CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
+                     "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT,    \
+                     __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
 .macro UNTRAIN_RET_FROM_CALL
 #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
        defined(CONFIG_CALL_DEPTH_TRACKING)
        VALIDATE_UNRET_END
        ALTERNATIVE_3 "",                                               \
-                     CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,          \
+                     CALL_UNTRAIN_RET, X86_FEATURE_UNRET,              \
                      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,        \
                      __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
 #endif
@@ -326,15 +342,24 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
 extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
 extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
 
+#ifdef CONFIG_RETHUNK
 extern void __x86_return_thunk(void);
-extern void zen_untrain_ret(void);
+#else
+static inline void __x86_return_thunk(void) {}
+#endif
+
+extern void retbleed_return_thunk(void);
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+
+extern void retbleed_untrain_ret(void);
+extern void srso_untrain_ret(void);
+extern void srso_alias_untrain_ret(void);
+
+extern void entry_untrain_ret(void);
 extern void entry_ibpb(void);
 
-#ifdef CONFIG_CALL_THUNKS
 extern void (*x86_return_thunk)(void);
-#else
-#define x86_return_thunk       (&__x86_return_thunk)
-#endif
 
 #ifdef CONFIG_CALL_DEPTH_TRACKING
 extern void __x86_return_skl(void);
@@ -461,9 +486,6 @@ enum ssb_mitigation {
        SPEC_STORE_BYPASS_SECCOMP,
 };
 
-extern char __indirect_thunk_start[];
-extern char __indirect_thunk_end[];
-
 static __always_inline
 void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
 {
@@ -475,11 +497,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
                : "memory");
 }
 
+extern u64 x86_pred_cmd;
+
 static inline void indirect_branch_prediction_barrier(void)
 {
-       u64 val = PRED_CMD_IBPB;
-
-       alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
+       alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
 }
 
 /* The Intel SPEC CTRL MSR base value cache */
index b497786..6c8ff12 100644 (file)
@@ -739,6 +739,7 @@ static __always_inline unsigned long arch_local_irq_save(void)
             ".popsection")
 
 extern void default_banner(void);
+void native_pv_lock_init(void) __init;
 
 #else  /* __ASSEMBLY__ */
 
@@ -778,6 +779,12 @@ extern void default_banner(void);
 #endif /* __ASSEMBLY__ */
 #else  /* CONFIG_PARAVIRT */
 # define default_banner x86_init_noop
+
+#ifndef __ASSEMBLY__
+static inline void native_pv_lock_init(void)
+{
+}
+#endif
 #endif /* !CONFIG_PARAVIRT */
 
 #ifndef __ASSEMBLY__
index d46300e..861e53e 100644 (file)
@@ -586,7 +586,6 @@ extern char                 ignore_fpu_irq;
 
 #define HAVE_ARCH_PICK_MMAP_LAYOUT 1
 #define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
 
 #ifdef CONFIG_X86_32
 # define BASE_PREFETCH         ""
@@ -620,11 +619,6 @@ static __always_inline void prefetchw(const void *x)
                          "m" (*(const char *)x));
 }
 
-static inline void spin_lock_prefetch(const void *x)
-{
-       prefetchw(x);
-}
-
 #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
                           TOP_OF_KERNEL_STACK_PADDING)
 
@@ -682,9 +676,15 @@ extern u16 get_llc_id(unsigned int cpu);
 #ifdef CONFIG_CPU_SUP_AMD
 extern u32 amd_get_nodes_per_socket(void);
 extern u32 amd_get_highest_perf(void);
+extern bool cpu_has_ibpb_brtype_microcode(void);
+extern void amd_clear_divider(void);
+extern void amd_check_microcode(void);
 #else
 static inline u32 amd_get_nodes_per_socket(void)       { return 0; }
 static inline u32 amd_get_highest_perf(void)           { return 0; }
+static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
+static inline void amd_clear_divider(void)             { }
+static inline void amd_check_microcode(void)           { }
 #endif
 
 extern unsigned long arch_align_stack(unsigned long sp);
@@ -727,4 +727,6 @@ bool arch_is_platform_page(u64 paddr);
 #define arch_is_platform_page arch_is_platform_page
 #endif
 
+extern bool gds_ucode_mitigated(void);
+
 #endif /* _ASM_X86_PROCESSOR_H */
index d87451d..cde8357 100644 (file)
@@ -74,8 +74,6 @@ static inline bool vcpu_is_preempted(long cpu)
  */
 DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
 
-void native_pv_lock_init(void) __init;
-
 /*
  * Shortcut for the queued_spin_lock_slowpath() function that allows
  * virt to hijack it.
@@ -103,10 +101,7 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
 
        return true;
 }
-#else
-static inline void native_pv_lock_init(void)
-{
-}
+
 #endif /* CONFIG_PARAVIRT */
 
 #include <asm-generic/qspinlock.h>
index 42b17cf..85b6e36 100644 (file)
@@ -4,6 +4,8 @@
 
 #include <asm/ibt.h>
 
+void __lockfunc __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked);
+
 /*
  * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
  * registers. For i386, however, only 1 32-bit register needs to be saved
index 794f696..9d6411c 100644 (file)
@@ -56,7 +56,7 @@
 
 #define GDT_ENTRY_INVALID_SEG  0
 
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64)
 /*
  * The layout of the per-CPU GDT under Linux:
  *
index 66c8067..5b4a1ce 100644 (file)
@@ -164,6 +164,7 @@ static __always_inline void sev_es_nmi_complete(void)
                __sev_es_nmi_complete();
 }
 extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
+extern void sev_enable(struct boot_params *bp);
 
 static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
 {
@@ -210,12 +211,15 @@ bool snp_init(struct boot_params *bp);
 void __init __noreturn snp_abort(void);
 int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
 void snp_accept_memory(phys_addr_t start, phys_addr_t end);
+u64 snp_get_unsupported_features(u64 status);
+u64 sev_get_status(void);
 #else
 static inline void sev_es_ist_enter(struct pt_regs *regs) { }
 static inline void sev_es_ist_exit(void) { }
 static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
 static inline void sev_es_nmi_complete(void) { }
 static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+static inline void sev_enable(struct boot_params *bp) { }
 static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
 static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
 static inline void setup_ghcb(void) { }
@@ -235,6 +239,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
 }
 
 static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
+static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
+static inline u64 sev_get_status(void) { return 0; }
 #endif
 
 #endif
index 5c91305..f42dbf1 100644 (file)
@@ -12,7 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
 __visible struct task_struct *__switch_to(struct task_struct *prev,
                                          struct task_struct *next);
 
-asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_fork_asm(void);
+__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
+                            int (*fn)(void *), void *fn_arg);
 
 /*
  * This is the structure pointed to by thread.sp for an inactive task.  The
index caf41c4..3235ba1 100644 (file)
@@ -136,10 +136,11 @@ static inline int topology_max_smt_threads(void)
        return __max_smt_threads;
 }
 
+#include <linux/cpu_smt.h>
+
 int topology_update_package_map(unsigned int apicid, unsigned int cpu);
 int topology_update_die_map(unsigned int dieid, unsigned int cpu);
 int topology_phys_to_logical_pkg(unsigned int pkg);
-bool topology_smt_supported(void);
 
 extern struct cpumask __cpu_primary_thread_mask;
 #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask)
@@ -162,7 +163,6 @@ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
 static inline int topology_max_die_per_package(void) { return 1; }
 static inline int topology_max_smt_threads(void) { return 1; }
 static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
-static inline bool topology_smt_supported(void) { return false; }
 #endif /* !CONFIG_SMP */
 
 static inline void arch_fix_phys_package_id(int num, u32 slot)
index 1b6455f..6989b82 100644 (file)
@@ -10,6 +10,7 @@
  * Copyright (c) Russ Anderson <rja@sgi.com>
  */
 
+#include <linux/efi.h>
 #include <linux/rtc.h>
 
 /*
@@ -115,7 +116,8 @@ struct uv_arch_type_entry {
 struct uv_systab {
        char signature[4];      /* must be UV_SYSTAB_SIG */
        u32 revision;           /* distinguish different firmware revs */
-       u64 function;           /* BIOS runtime callback function ptr */
+       u64 (__efiapi *function)(enum uv_bios_cmd, ...);
+                               /* BIOS runtime callback function ptr */
        u32 size;               /* systab size (starting with _VERSION_UV4) */
        struct {
                u32 type:8;     /* type of entry */
index fa9ec20..85e63d5 100644 (file)
@@ -295,7 +295,10 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn)
 
 /* VIRT <-> MACHINE conversion */
 #define virt_to_machine(v)     (phys_to_machine(XPADDR(__pa(v))))
-#define virt_to_pfn(v)          (PFN_DOWN(__pa(v)))
+static inline unsigned long virt_to_pfn(const void *v)
+{
+       return PFN_DOWN(__pa(v));
+}
 #define virt_to_mfn(v)         (pfn_to_mfn(virt_to_pfn(v)))
 #define mfn_to_virt(m)         (__va(mfn_to_pfn(m) << PAGE_SHIFT))
 
index 21b542a..53369c5 100644 (file)
@@ -52,6 +52,7 @@ int acpi_lapic;
 int acpi_ioapic;
 int acpi_strict;
 int acpi_disable_cmcff;
+bool acpi_int_src_ovr[NR_IRQS_LEGACY];
 
 /* ACPI SCI override configuration */
 u8 acpi_sci_flags __initdata;
@@ -588,6 +589,9 @@ acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
 
        acpi_table_print_madt_entry(&header->common);
 
+       if (intsrc->source_irq < NR_IRQS_LEGACY)
+               acpi_int_src_ovr[intsrc->source_irq] = true;
+
        if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
                acpi_sci_ioapic_setup(intsrc->source_irq,
                                      intsrc->inti_flags & ACPI_MADT_POLARITY_MASK,
index 72646d7..a5ead6a 100644 (file)
@@ -687,10 +687,6 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
 
 #ifdef CONFIG_RETHUNK
 
-#ifdef CONFIG_CALL_THUNKS
-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
-#endif
-
 /*
  * Rewrite the compiler generated return thunk tail-calls.
  *
@@ -778,6 +774,8 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
 
 #ifdef CONFIG_X86_KERNEL_IBT
 
+static void poison_cfi(void *addr);
+
 static void __init_or_module poison_endbr(void *addr, bool warn)
 {
        u32 endbr, poison = gen_endbr_poison();
@@ -802,8 +800,11 @@ static void __init_or_module poison_endbr(void *addr, bool warn)
 
 /*
  * Generated by: objtool --ibt
+ *
+ * Seal the functions for indirect calls by clobbering the ENDBR instructions
+ * and the kCFI hash value.
  */
-void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
+void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
 {
        s32 *s;
 
@@ -812,13 +813,13 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
 
                poison_endbr(addr, true);
                if (IS_ENABLED(CONFIG_FINEIBT))
-                       poison_endbr(addr - 16, false);
+                       poison_cfi(addr - 16);
        }
 }
 
 #else
 
-void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { }
+void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
 
 #endif /* CONFIG_X86_KERNEL_IBT */
 
@@ -1063,6 +1064,17 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
        return 0;
 }
 
+static void cfi_rewrite_endbr(s32 *start, s32 *end)
+{
+       s32 *s;
+
+       for (s = start; s < end; s++) {
+               void *addr = (void *)s + *s;
+
+               poison_endbr(addr+16, false);
+       }
+}
+
 /* .retpoline_sites */
 static int cfi_rand_callers(s32 *start, s32 *end)
 {
@@ -1157,14 +1169,19 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
                return;
 
        case CFI_FINEIBT:
+               /* place the FineIBT preamble at func()-16 */
                ret = cfi_rewrite_preamble(start_cfi, end_cfi);
                if (ret)
                        goto err;
 
+               /* rewrite the callers to target func()-16 */
                ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
                if (ret)
                        goto err;
 
+               /* now that nobody targets func()+0, remove ENDBR there */
+               cfi_rewrite_endbr(start_cfi, end_cfi);
+
                if (builtin)
                        pr_info("Using FineIBT CFI\n");
                return;
@@ -1177,6 +1194,41 @@ err:
        pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n");
 }
 
+static inline void poison_hash(void *addr)
+{
+       *(u32 *)addr = 0;
+}
+
+static void poison_cfi(void *addr)
+{
+       switch (cfi_mode) {
+       case CFI_FINEIBT:
+               /*
+                * __cfi_\func:
+                *      osp nopl (%rax)
+                *      subl    $0, %r10d
+                *      jz      1f
+                *      ud2
+                * 1:   nop
+                */
+               poison_endbr(addr, false);
+               poison_hash(addr + fineibt_preamble_hash);
+               break;
+
+       case CFI_KCFI:
+               /*
+                * __cfi_\func:
+                *      movl    $0, %eax
+                *      .skip   11, 0x90
+                */
+               poison_hash(addr + 1);
+               break;
+
+       default:
+               break;
+       }
+}
+
 #else
 
 static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
@@ -1184,6 +1236,10 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
 {
 }
 
+#ifdef CONFIG_X86_KERNEL_IBT
+static void poison_cfi(void *addr) { }
+#endif
+
 #endif
 
 void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
@@ -1471,6 +1527,7 @@ static noinline void __init int3_selftest(void)
 
 static __initdata int __alt_reloc_selftest_addr;
 
+extern void __init __alt_reloc_selftest(void *arg);
 __visible noinline void __init __alt_reloc_selftest(void *arg)
 {
        WARN_ON(arg != &__alt_reloc_selftest_addr);
@@ -1565,7 +1622,10 @@ void __init alternative_instructions(void)
         */
        callthunks_patch_builtin_calls();
 
-       apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
+       /*
+        * Seal all functions that do not have their address taken.
+        */
+       apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
 
 #ifdef CONFIG_SMP
        /* Patch to UP if other cpus not imminent. */
index 035a3db..356de95 100644 (file)
@@ -24,6 +24,8 @@
 #define PCI_DEVICE_ID_AMD_19H_M40H_ROOT                0x14b5
 #define PCI_DEVICE_ID_AMD_19H_M60H_ROOT                0x14d8
 #define PCI_DEVICE_ID_AMD_19H_M70H_ROOT                0x14e8
+#define PCI_DEVICE_ID_AMD_1AH_M00H_ROOT                0x153a
+#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT                0x1507
 #define PCI_DEVICE_ID_AMD_MI200_ROOT           0x14bb
 
 #define PCI_DEVICE_ID_AMD_17H_DF_F4            0x1464
@@ -39,6 +41,7 @@
 #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4       0x14e4
 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4       0x14f4
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4       0x12fc
+#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4       0x12c4
 #define PCI_DEVICE_ID_AMD_MI200_DF_F4          0x14d4
 
 /* Protect the PCI config register pairs used for SMN. */
@@ -56,6 +59,8 @@ static const struct pci_device_id amd_root_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_ROOT) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) },
        {}
 };
@@ -85,6 +90,8 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) },
        {}
 };
@@ -106,6 +113,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) },
        {}
 };
index 2a6509e..9bfd6e3 100644 (file)
@@ -301,6 +301,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
        local_irq_restore(flags);
 }
 
+#ifdef CONFIG_SMP
 /* must come after the send_IPI functions above for inlining */
 static int convert_apicid_to_cpu(int apic_id)
 {
@@ -329,3 +330,4 @@ int safe_smp_processor_id(void)
        return cpuid >= 0 ? cpuid : 0;
 }
 #endif
+#endif
index d9384d5..b524dee 100644 (file)
@@ -294,8 +294,7 @@ static void __init early_get_apic_socketid_shift(void)
 
 static void __init uv_stringify(int len, char *to, char *from)
 {
-       /* Relies on 'to' being NULL chars so result will be NULL terminated */
-       strncpy(to, from, len-1);
+       strscpy(to, from, len);
 
        /* Trim trailing spaces */
        (void)strim(to);
@@ -1013,7 +1012,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index,
 
        /* One (UV2) mapping */
        if (index == UV2_MMIOH) {
-               strncpy(id, "MMIOH", sizeof(id));
+               strscpy(id, "MMIOH", sizeof(id));
                max_io = max_pnode;
                mapped = 0;
                goto map_exit;
index 571abf8..7eca6a8 100644 (file)
 
 #include "cpu.h"
 
-static const int amd_erratum_383[];
-static const int amd_erratum_400[];
-static const int amd_erratum_1054[];
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
-
 /*
  * nodes_per_socket: Stores the number of nodes per socket.
  * Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
@@ -39,6 +34,83 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
  */
 static u32 nodes_per_socket = 1;
 
+/*
+ * AMD errata checking
+ *
+ * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
+ * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
+ * have an OSVW id assigned, which it takes as first argument. Both take a
+ * variable number of family-specific model-stepping ranges created by
+ * AMD_MODEL_RANGE().
+ *
+ * Example:
+ *
+ * const int amd_erratum_319[] =
+ *     AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
+ *                        AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
+ *                        AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
+ */
+
+#define AMD_LEGACY_ERRATUM(...)                { -1, __VA_ARGS__, 0 }
+#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+       ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range)  (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range)   (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range)     ((range) & 0xfff)
+
+static const int amd_erratum_400[] =
+       AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+                           AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
+
+static const int amd_erratum_383[] =
+       AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+
+/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+static const int amd_erratum_1054[] =
+       AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+
+static const int amd_zenbleed[] =
+       AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
+                          AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
+                          AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
+                          AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
+
+static const int amd_div0[] =
+       AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+                          AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
+{
+       int osvw_id = *erratum++;
+       u32 range;
+       u32 ms;
+
+       if (osvw_id >= 0 && osvw_id < 65536 &&
+           cpu_has(cpu, X86_FEATURE_OSVW)) {
+               u64 osvw_len;
+
+               rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
+               if (osvw_id < osvw_len) {
+                       u64 osvw_bits;
+
+                       rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
+                           osvw_bits);
+                       return osvw_bits & (1ULL << (osvw_id & 0x3f));
+               }
+       }
+
+       /* OSVW unavailable or ID unknown, match family-model-stepping range */
+       ms = (cpu->x86_model << 4) | cpu->x86_stepping;
+       while ((range = *erratum++))
+               if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+                   (ms >= AMD_MODEL_RANGE_START(range)) &&
+                   (ms <= AMD_MODEL_RANGE_END(range)))
+                       return true;
+
+       return false;
+}
+
 static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
 {
        u32 gprs[8] = { 0 };
@@ -916,6 +988,47 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
        }
 }
 
+static bool cpu_has_zenbleed_microcode(void)
+{
+       u32 good_rev = 0;
+
+       switch (boot_cpu_data.x86_model) {
+       case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
+       case 0x60 ... 0x67: good_rev = 0x0860010b; break;
+       case 0x68 ... 0x6f: good_rev = 0x08608105; break;
+       case 0x70 ... 0x7f: good_rev = 0x08701032; break;
+       case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
+
+       default:
+               return false;
+               break;
+       }
+
+       if (boot_cpu_data.microcode < good_rev)
+               return false;
+
+       return true;
+}
+
+static void zenbleed_check(struct cpuinfo_x86 *c)
+{
+       if (!cpu_has_amd_erratum(c, amd_zenbleed))
+               return;
+
+       if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+               return;
+
+       if (!cpu_has(c, X86_FEATURE_AVX))
+               return;
+
+       if (!cpu_has_zenbleed_microcode()) {
+               pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n");
+               msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+       } else {
+               msr_clear_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+       }
+}
+
 static void init_amd(struct cpuinfo_x86 *c)
 {
        early_init_amd(c);
@@ -1020,6 +1133,13 @@ static void init_amd(struct cpuinfo_x86 *c)
        if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
            cpu_has(c, X86_FEATURE_AUTOIBRS))
                WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
+
+       zenbleed_check(c);
+
+       if (cpu_has_amd_erratum(c, amd_div0)) {
+               pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+               setup_force_cpu_bug(X86_BUG_DIV0);
+       }
 }
 
 #ifdef CONFIG_X86_32
@@ -1115,73 +1235,6 @@ static const struct cpu_dev amd_cpu_dev = {
 
 cpu_dev_register(amd_cpu_dev);
 
-/*
- * AMD errata checking
- *
- * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
- * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
- * have an OSVW id assigned, which it takes as first argument. Both take a
- * variable number of family-specific model-stepping ranges created by
- * AMD_MODEL_RANGE().
- *
- * Example:
- *
- * const int amd_erratum_319[] =
- *     AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
- *                        AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
- *                        AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
- */
-
-#define AMD_LEGACY_ERRATUM(...)                { -1, __VA_ARGS__, 0 }
-#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
-#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
-       ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
-#define AMD_MODEL_RANGE_FAMILY(range)  (((range) >> 24) & 0xff)
-#define AMD_MODEL_RANGE_START(range)   (((range) >> 12) & 0xfff)
-#define AMD_MODEL_RANGE_END(range)     ((range) & 0xfff)
-
-static const int amd_erratum_400[] =
-       AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
-                           AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
-
-static const int amd_erratum_383[] =
-       AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
-
-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
-static const int amd_erratum_1054[] =
-       AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
-{
-       int osvw_id = *erratum++;
-       u32 range;
-       u32 ms;
-
-       if (osvw_id >= 0 && osvw_id < 65536 &&
-           cpu_has(cpu, X86_FEATURE_OSVW)) {
-               u64 osvw_len;
-
-               rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
-               if (osvw_id < osvw_len) {
-                       u64 osvw_bits;
-
-                       rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
-                           osvw_bits);
-                       return osvw_bits & (1ULL << (osvw_id & 0x3f));
-               }
-       }
-
-       /* OSVW unavailable or ID unknown, match family-model-stepping range */
-       ms = (cpu->x86_model << 4) | cpu->x86_stepping;
-       while ((range = *erratum++))
-               if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
-                   (ms >= AMD_MODEL_RANGE_START(range)) &&
-                   (ms <= AMD_MODEL_RANGE_END(range)))
-                       return true;
-
-       return false;
-}
-
 static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[4], amd_dr_addr_mask);
 
 static unsigned int amd_msr_dr_addr_masks[] = {
@@ -1235,3 +1288,45 @@ u32 amd_get_highest_perf(void)
        return 255;
 }
 EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+
+static void zenbleed_check_cpu(void *unused)
+{
+       struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+
+       zenbleed_check(c);
+}
+
+void amd_check_microcode(void)
+{
+       on_each_cpu(zenbleed_check_cpu, NULL, 1);
+}
+
+bool cpu_has_ibpb_brtype_microcode(void)
+{
+       switch (boot_cpu_data.x86) {
+       /* Zen1/2 IBPB flushes branch type predictions too. */
+       case 0x17:
+               return boot_cpu_has(X86_FEATURE_AMD_IBPB);
+       case 0x19:
+               /* Poke the MSR bit on Zen3/4 to check its presence. */
+               if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
+                       setup_force_cpu_cap(X86_FEATURE_SBPB);
+                       return true;
+               } else {
+                       return false;
+               }
+       default:
+               return false;
+       }
+}
+
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+void noinstr amd_clear_divider(void)
+{
+       asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+                    :: "a" (0), "d" (0), "r" (1));
+}
+EXPORT_SYMBOL_GPL(amd_clear_divider);
index 9e2a918..f081d26 100644 (file)
@@ -47,6 +47,8 @@ static void __init taa_select_mitigation(void);
 static void __init mmio_select_mitigation(void);
 static void __init srbds_select_mitigation(void);
 static void __init l1d_flush_select_mitigation(void);
+static void __init srso_select_mitigation(void);
+static void __init gds_select_mitigation(void);
 
 /* The base value of the SPEC_CTRL MSR without task-specific bits set */
 u64 x86_spec_ctrl_base;
@@ -56,8 +58,13 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
 DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
 EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
 
+u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
+EXPORT_SYMBOL_GPL(x86_pred_cmd);
+
 static DEFINE_MUTEX(spec_ctrl_mutex);
 
+void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+
 /* Update SPEC_CTRL MSR and its cached copy unconditionally */
 static void update_spec_ctrl(u64 val)
 {
@@ -160,6 +167,13 @@ void __init cpu_select_mitigations(void)
        md_clear_select_mitigation();
        srbds_select_mitigation();
        l1d_flush_select_mitigation();
+
+       /*
+        * srso_select_mitigation() depends and must run after
+        * retbleed_select_mitigation().
+        */
+       srso_select_mitigation();
+       gds_select_mitigation();
 }
 
 /*
@@ -646,6 +660,149 @@ static int __init l1d_flush_parse_cmdline(char *str)
 early_param("l1d_flush", l1d_flush_parse_cmdline);
 
 #undef pr_fmt
+#define pr_fmt(fmt)    "GDS: " fmt
+
+enum gds_mitigations {
+       GDS_MITIGATION_OFF,
+       GDS_MITIGATION_UCODE_NEEDED,
+       GDS_MITIGATION_FORCE,
+       GDS_MITIGATION_FULL,
+       GDS_MITIGATION_FULL_LOCKED,
+       GDS_MITIGATION_HYPERVISOR,
+};
+
+#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION)
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
+#else
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
+#endif
+
+static const char * const gds_strings[] = {
+       [GDS_MITIGATION_OFF]            = "Vulnerable",
+       [GDS_MITIGATION_UCODE_NEEDED]   = "Vulnerable: No microcode",
+       [GDS_MITIGATION_FORCE]          = "Mitigation: AVX disabled, no microcode",
+       [GDS_MITIGATION_FULL]           = "Mitigation: Microcode",
+       [GDS_MITIGATION_FULL_LOCKED]    = "Mitigation: Microcode (locked)",
+       [GDS_MITIGATION_HYPERVISOR]     = "Unknown: Dependent on hypervisor status",
+};
+
+bool gds_ucode_mitigated(void)
+{
+       return (gds_mitigation == GDS_MITIGATION_FULL ||
+               gds_mitigation == GDS_MITIGATION_FULL_LOCKED);
+}
+EXPORT_SYMBOL_GPL(gds_ucode_mitigated);
+
+void update_gds_msr(void)
+{
+       u64 mcu_ctrl_after;
+       u64 mcu_ctrl;
+
+       switch (gds_mitigation) {
+       case GDS_MITIGATION_OFF:
+               rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+               mcu_ctrl |= GDS_MITG_DIS;
+               break;
+       case GDS_MITIGATION_FULL_LOCKED:
+               /*
+                * The LOCKED state comes from the boot CPU. APs might not have
+                * the same state. Make sure the mitigation is enabled on all
+                * CPUs.
+                */
+       case GDS_MITIGATION_FULL:
+               rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+               mcu_ctrl &= ~GDS_MITG_DIS;
+               break;
+       case GDS_MITIGATION_FORCE:
+       case GDS_MITIGATION_UCODE_NEEDED:
+       case GDS_MITIGATION_HYPERVISOR:
+               return;
+       };
+
+       wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+
+       /*
+        * Check to make sure that the WRMSR value was not ignored. Writes to
+        * GDS_MITG_DIS will be ignored if this processor is locked but the boot
+        * processor was not.
+        */
+       rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
+       WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after);
+}
+
+static void __init gds_select_mitigation(void)
+{
+       u64 mcu_ctrl;
+
+       if (!boot_cpu_has_bug(X86_BUG_GDS))
+               return;
+
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+               gds_mitigation = GDS_MITIGATION_HYPERVISOR;
+               goto out;
+       }
+
+       if (cpu_mitigations_off())
+               gds_mitigation = GDS_MITIGATION_OFF;
+       /* Will verify below that mitigation _can_ be disabled */
+
+       /* No microcode */
+       if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+               if (gds_mitigation == GDS_MITIGATION_FORCE) {
+                       /*
+                        * This only needs to be done on the boot CPU so do it
+                        * here rather than in update_gds_msr()
+                        */
+                       setup_clear_cpu_cap(X86_FEATURE_AVX);
+                       pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
+               } else {
+                       gds_mitigation = GDS_MITIGATION_UCODE_NEEDED;
+               }
+               goto out;
+       }
+
+       /* Microcode has mitigation, use it */
+       if (gds_mitigation == GDS_MITIGATION_FORCE)
+               gds_mitigation = GDS_MITIGATION_FULL;
+
+       rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+       if (mcu_ctrl & GDS_MITG_LOCKED) {
+               if (gds_mitigation == GDS_MITIGATION_OFF)
+                       pr_warn("Mitigation locked. Disable failed.\n");
+
+               /*
+                * The mitigation is selected from the boot CPU. All other CPUs
+                * _should_ have the same state. If the boot CPU isn't locked
+                * but others are then update_gds_msr() will WARN() of the state
+                * mismatch. If the boot CPU is locked update_gds_msr() will
+                * ensure the other CPUs have the mitigation enabled.
+                */
+               gds_mitigation = GDS_MITIGATION_FULL_LOCKED;
+       }
+
+       update_gds_msr();
+out:
+       pr_info("%s\n", gds_strings[gds_mitigation]);
+}
+
+static int __init gds_parse_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!boot_cpu_has_bug(X86_BUG_GDS))
+               return 0;
+
+       if (!strcmp(str, "off"))
+               gds_mitigation = GDS_MITIGATION_OFF;
+       else if (!strcmp(str, "force"))
+               gds_mitigation = GDS_MITIGATION_FORCE;
+
+       return 0;
+}
+early_param("gather_data_sampling", gds_parse_cmdline);
+
+#undef pr_fmt
 #define pr_fmt(fmt)     "Spectre V1 : " fmt
 
 enum spectre_v1_mitigation {
@@ -885,6 +1042,9 @@ do_cmd_auto:
                setup_force_cpu_cap(X86_FEATURE_RETHUNK);
                setup_force_cpu_cap(X86_FEATURE_UNRET);
 
+               if (IS_ENABLED(CONFIG_RETHUNK))
+                       x86_return_thunk = retbleed_return_thunk;
+
                if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
                    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
                        pr_err(RETBLEED_UNTRAIN_MSG);
@@ -894,6 +1054,7 @@ do_cmd_auto:
 
        case RETBLEED_MITIGATION_IBPB:
                setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+               setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
                mitigate_smt = true;
                break;
 
@@ -1150,19 +1311,21 @@ spectre_v2_user_select_mitigation(void)
        }
 
        /*
-        * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
+        * If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP
         * is not required.
         *
-        * Enhanced IBRS also protects against cross-thread branch target
+        * Intel's Enhanced IBRS also protects against cross-thread branch target
         * injection in user-mode as the IBRS bit remains always set which
         * implicitly enables cross-thread protections.  However, in legacy IBRS
         * mode, the IBRS bit is set only on kernel entry and cleared on return
-        * to userspace. This disables the implicit cross-thread protection,
-        * so allow for STIBP to be selected in that case.
+        * to userspace.  AMD Automatic IBRS also does not protect userspace.
+        * These modes therefore disable the implicit cross-thread protection,
+        * so allow for STIBP to be selected in those cases.
         */
        if (!boot_cpu_has(X86_FEATURE_STIBP) ||
            !smt_possible ||
-           spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+           (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+            !boot_cpu_has(X86_FEATURE_AUTOIBRS)))
                return;
 
        /*
@@ -2186,6 +2349,170 @@ static int __init l1tf_cmdline(char *str)
 early_param("l1tf", l1tf_cmdline);
 
 #undef pr_fmt
+#define pr_fmt(fmt)    "Speculative Return Stack Overflow: " fmt
+
+enum srso_mitigation {
+       SRSO_MITIGATION_NONE,
+       SRSO_MITIGATION_MICROCODE,
+       SRSO_MITIGATION_SAFE_RET,
+       SRSO_MITIGATION_IBPB,
+       SRSO_MITIGATION_IBPB_ON_VMEXIT,
+};
+
+enum srso_mitigation_cmd {
+       SRSO_CMD_OFF,
+       SRSO_CMD_MICROCODE,
+       SRSO_CMD_SAFE_RET,
+       SRSO_CMD_IBPB,
+       SRSO_CMD_IBPB_ON_VMEXIT,
+};
+
+static const char * const srso_strings[] = {
+       [SRSO_MITIGATION_NONE]           = "Vulnerable",
+       [SRSO_MITIGATION_MICROCODE]      = "Mitigation: microcode",
+       [SRSO_MITIGATION_SAFE_RET]       = "Mitigation: safe RET",
+       [SRSO_MITIGATION_IBPB]           = "Mitigation: IBPB",
+       [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only"
+};
+
+static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE;
+static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET;
+
+static int __init srso_parse_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!strcmp(str, "off"))
+               srso_cmd = SRSO_CMD_OFF;
+       else if (!strcmp(str, "microcode"))
+               srso_cmd = SRSO_CMD_MICROCODE;
+       else if (!strcmp(str, "safe-ret"))
+               srso_cmd = SRSO_CMD_SAFE_RET;
+       else if (!strcmp(str, "ibpb"))
+               srso_cmd = SRSO_CMD_IBPB;
+       else if (!strcmp(str, "ibpb-vmexit"))
+               srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT;
+       else
+               pr_err("Ignoring unknown SRSO option (%s).", str);
+
+       return 0;
+}
+early_param("spec_rstack_overflow", srso_parse_cmdline);
+
+#define SRSO_NOTICE "WARNING: See https://kernel.org/doc/html/latest/admin-guide/hw-vuln/srso.html for mitigation options."
+
+static void __init srso_select_mitigation(void)
+{
+       bool has_microcode;
+
+       if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off())
+               goto pred_cmd;
+
+       /*
+        * The first check is for the kernel running as a guest in order
+        * for guests to verify whether IBPB is a viable mitigation.
+        */
+       has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode();
+       if (!has_microcode) {
+               pr_warn("IBPB-extending microcode not applied!\n");
+               pr_warn(SRSO_NOTICE);
+       } else {
+               /*
+                * Enable the synthetic (even if in a real CPUID leaf)
+                * flags for guests.
+                */
+               setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
+
+               /*
+                * Zen1/2 with SMT off aren't vulnerable after the right
+                * IBPB microcode has been applied.
+                */
+               if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) {
+                       setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
+                       return;
+               }
+       }
+
+       if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
+               if (has_microcode) {
+                       pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n");
+                       srso_mitigation = SRSO_MITIGATION_IBPB;
+                       goto pred_cmd;
+               }
+       }
+
+       switch (srso_cmd) {
+       case SRSO_CMD_OFF:
+               return;
+
+       case SRSO_CMD_MICROCODE:
+               if (has_microcode) {
+                       srso_mitigation = SRSO_MITIGATION_MICROCODE;
+                       pr_warn(SRSO_NOTICE);
+               }
+               break;
+
+       case SRSO_CMD_SAFE_RET:
+               if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+                       /*
+                        * Enable the return thunk for generated code
+                        * like ftrace, static_call, etc.
+                        */
+                       setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+                       setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+                       if (boot_cpu_data.x86 == 0x19) {
+                               setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
+                               x86_return_thunk = srso_alias_return_thunk;
+                       } else {
+                               setup_force_cpu_cap(X86_FEATURE_SRSO);
+                               x86_return_thunk = srso_return_thunk;
+                       }
+                       srso_mitigation = SRSO_MITIGATION_SAFE_RET;
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+                       goto pred_cmd;
+               }
+               break;
+
+       case SRSO_CMD_IBPB:
+               if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+                       if (has_microcode) {
+                               setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+                               srso_mitigation = SRSO_MITIGATION_IBPB;
+                       }
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+                       goto pred_cmd;
+               }
+               break;
+
+       case SRSO_CMD_IBPB_ON_VMEXIT:
+               if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+                       if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+                               setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+                               srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
+                       }
+               } else {
+                       pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+                       goto pred_cmd;
+                }
+               break;
+
+       default:
+               break;
+       }
+
+       pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode"));
+
+pred_cmd:
+       if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) &&
+            boot_cpu_has(X86_FEATURE_SBPB))
+               x86_pred_cmd = PRED_CMD_SBPB;
+}
+
+#undef pr_fmt
 #define pr_fmt(fmt) fmt
 
 #ifdef CONFIG_SYSFS
@@ -2294,7 +2621,8 @@ static ssize_t mmio_stale_data_show_state(char *buf)
 
 static char *stibp_state(void)
 {
-       if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+       if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+           !boot_cpu_has(X86_FEATURE_AUTOIBRS))
                return "";
 
        switch (spectre_v2_user_stibp) {
@@ -2382,6 +2710,21 @@ static ssize_t retbleed_show_state(char *buf)
        return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
 }
 
+static ssize_t srso_show_state(char *buf)
+{
+       if (boot_cpu_has(X86_FEATURE_SRSO_NO))
+               return sysfs_emit(buf, "Mitigation: SMT disabled\n");
+
+       return sysfs_emit(buf, "%s%s\n",
+                         srso_strings[srso_mitigation],
+                         (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode"));
+}
+
+static ssize_t gds_show_state(char *buf)
+{
+       return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
+}
+
 static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
                               char *buf, unsigned int bug)
 {
@@ -2431,6 +2774,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
        case X86_BUG_RETBLEED:
                return retbleed_show_state(buf);
 
+       case X86_BUG_SRSO:
+               return srso_show_state(buf);
+
+       case X86_BUG_GDS:
+               return gds_show_state(buf);
+
        default:
                break;
        }
@@ -2495,4 +2844,14 @@ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, cha
 {
        return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
 }
+
+ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_SRSO);
+}
+
+ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
+}
 #endif
index 52683fd..41b573f 100644 (file)
@@ -59,7 +59,6 @@
 #include <asm/cacheinfo.h>
 #include <asm/memtype.h>
 #include <asm/microcode.h>
-#include <asm/microcode_intel.h>
 #include <asm/intel-family.h>
 #include <asm/cpu_device_id.h>
 #include <asm/uv/uv.h>
@@ -1250,6 +1249,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
 #define RETBLEED       BIT(3)
 /* CPU is affected by SMT (cross-thread) return predictions */
 #define SMT_RSB                BIT(4)
+/* CPU is affected by SRSO */
+#define SRSO           BIT(5)
+/* CPU is affected by GDS */
+#define GDS            BIT(6)
 
 static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
        VULNBL_INTEL_STEPPINGS(IVYBRIDGE,       X86_STEPPING_ANY,               SRBDS),
@@ -1262,27 +1265,30 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
        VULNBL_INTEL_STEPPINGS(BROADWELL_X,     X86_STEPPING_ANY,               MMIO),
        VULNBL_INTEL_STEPPINGS(BROADWELL,       X86_STEPPING_ANY,               SRBDS),
        VULNBL_INTEL_STEPPINGS(SKYLAKE_L,       X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       X86_STEPPING_ANY,               MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(SKYLAKE_X,       X86_STEPPING_ANY,               MMIO | RETBLEED | GDS),
        VULNBL_INTEL_STEPPINGS(SKYLAKE,         X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE_L,      X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(KABYLAKE,        X86_STEPPING_ANY,               SRBDS | MMIO | RETBLEED | GDS),
        VULNBL_INTEL_STEPPINGS(CANNONLAKE_L,    X86_STEPPING_ANY,               RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO),
-       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO),
-       VULNBL_INTEL_STEPPINGS(COMETLAKE,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_L,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_D,       X86_STEPPING_ANY,               MMIO | GDS),
+       VULNBL_INTEL_STEPPINGS(ICELAKE_X,       X86_STEPPING_ANY,               MMIO | GDS),
+       VULNBL_INTEL_STEPPINGS(COMETLAKE,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
        VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPINGS(0x0, 0x0),        MMIO | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(COMETLAKE_L,     X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED | GDS),
+       VULNBL_INTEL_STEPPINGS(TIGERLAKE_L,     X86_STEPPING_ANY,               GDS),
+       VULNBL_INTEL_STEPPINGS(TIGERLAKE,       X86_STEPPING_ANY,               GDS),
        VULNBL_INTEL_STEPPINGS(LAKEFIELD,       X86_STEPPING_ANY,               MMIO | MMIO_SBDS | RETBLEED),
-       VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPING_ANY,               MMIO | RETBLEED),
+       VULNBL_INTEL_STEPPINGS(ROCKETLAKE,      X86_STEPPING_ANY,               MMIO | RETBLEED | GDS),
        VULNBL_INTEL_STEPPINGS(ATOM_TREMONT,    X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
        VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D,  X86_STEPPING_ANY,               MMIO),
        VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L,  X86_STEPPING_ANY,               MMIO | MMIO_SBDS),
 
        VULNBL_AMD(0x15, RETBLEED),
        VULNBL_AMD(0x16, RETBLEED),
-       VULNBL_AMD(0x17, RETBLEED | SMT_RSB),
+       VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
        VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
+       VULNBL_AMD(0x19, SRSO),
        {}
 };
 
@@ -1406,6 +1412,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
        if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
                setup_force_cpu_bug(X86_BUG_SMT_RSB);
 
+       if (!cpu_has(c, X86_FEATURE_SRSO_NO)) {
+               if (cpu_matches(cpu_vuln_blacklist, SRSO))
+                       setup_force_cpu_bug(X86_BUG_SRSO);
+       }
+
+       /*
+        * Check if CPU is vulnerable to GDS. If running in a virtual machine on
+        * an affected processor, the VMM may have disabled the use of GATHER by
+        * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
+        * which means that AVX will be disabled.
+        */
+       if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+           boot_cpu_has(X86_FEATURE_AVX))
+               setup_force_cpu_bug(X86_BUG_GDS);
+
        if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
                return;
 
@@ -1962,6 +1983,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
        validate_apic_and_package_id(c);
        x86_spec_ctrl_setup_ap();
        update_srbds_msr();
+       if (boot_cpu_has_bug(X86_BUG_GDS))
+               update_gds_msr();
 
        tsx_ap_init();
 }
@@ -2276,8 +2299,7 @@ void store_cpu_caps(struct cpuinfo_x86 *curr_info)
  * @prev_info: CPU capabilities stored before an update.
  *
  * The microcode loader calls this upon late microcode load to recheck features,
- * only when microcode has been updated. Caller holds microcode_mutex and CPU
- * hotplug lock.
+ * only when microcode has been updated. Caller holds and CPU hotplug lock.
  *
  * Return: None
  */
@@ -2287,6 +2309,8 @@ void microcode_check(struct cpuinfo_x86 *prev_info)
 
        perf_check_microcode();
 
+       amd_check_microcode();
+
        store_cpu_caps(&curr_info);
 
        if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability,
@@ -2317,7 +2341,7 @@ void __init arch_cpu_finalize_init(void)
         * identify_boot_cpu() initialized SMT support information, let the
         * core code know.
         */
-       cpu_smt_check_topology();
+       cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings);
 
        if (!IS_ENABLED(CONFIG_SMP)) {
                pr_info("CPU: ");
index 1c44630..1dcd7d4 100644 (file)
@@ -83,6 +83,7 @@ void cpu_select_mitigations(void);
 
 extern void x86_spec_ctrl_setup_ap(void);
 extern void update_srbds_msr(void);
+extern void update_gds_msr(void);
 
 extern enum spectre_v2_mitigation spectre_v2_enabled;
 
index 1c46395..be40456 100644 (file)
@@ -20,7 +20,7 @@
 #include <asm/bugs.h>
 #include <asm/cpu.h>
 #include <asm/intel-family.h>
-#include <asm/microcode_intel.h>
+#include <asm/microcode.h>
 #include <asm/hwcap2.h>
 #include <asm/elf.h>
 #include <asm/cpu_device_id.h>
@@ -184,180 +184,6 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
        return false;
 }
 
-int intel_cpu_collect_info(struct ucode_cpu_info *uci)
-{
-       unsigned int val[2];
-       unsigned int family, model;
-       struct cpu_signature csig = { 0 };
-       unsigned int eax, ebx, ecx, edx;
-
-       memset(uci, 0, sizeof(*uci));
-
-       eax = 0x00000001;
-       ecx = 0;
-       native_cpuid(&eax, &ebx, &ecx, &edx);
-       csig.sig = eax;
-
-       family = x86_family(eax);
-       model  = x86_model(eax);
-
-       if (model >= 5 || family > 6) {
-               /* get processor flags from MSR 0x17 */
-               native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
-               csig.pf = 1 << ((val[1] >> 18) & 7);
-       }
-
-       csig.rev = intel_get_microcode_revision();
-
-       uci->cpu_sig = csig;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
-
-/*
- * Returns 1 if update has been found, 0 otherwise.
- */
-int intel_find_matching_signature(void *mc, unsigned int csig, int cpf)
-{
-       struct microcode_header_intel *mc_hdr = mc;
-       struct extended_sigtable *ext_hdr;
-       struct extended_signature *ext_sig;
-       int i;
-
-       if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf))
-               return 1;
-
-       /* Look for ext. headers: */
-       if (get_totalsize(mc_hdr) <= get_datasize(mc_hdr) + MC_HEADER_SIZE)
-               return 0;
-
-       ext_hdr = mc + get_datasize(mc_hdr) + MC_HEADER_SIZE;
-       ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE;
-
-       for (i = 0; i < ext_hdr->count; i++) {
-               if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf))
-                       return 1;
-               ext_sig++;
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(intel_find_matching_signature);
-
-/**
- * intel_microcode_sanity_check() - Sanity check microcode file.
- * @mc: Pointer to the microcode file contents.
- * @print_err: Display failure reason if true, silent if false.
- * @hdr_type: Type of file, i.e. normal microcode file or In Field Scan file.
- *            Validate if the microcode header type matches with the type
- *            specified here.
- *
- * Validate certain header fields and verify if computed checksum matches
- * with the one specified in the header.
- *
- * Return: 0 if the file passes all the checks, -EINVAL if any of the checks
- * fail.
- */
-int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type)
-{
-       unsigned long total_size, data_size, ext_table_size;
-       struct microcode_header_intel *mc_header = mc;
-       struct extended_sigtable *ext_header = NULL;
-       u32 sum, orig_sum, ext_sigcount = 0, i;
-       struct extended_signature *ext_sig;
-
-       total_size = get_totalsize(mc_header);
-       data_size = get_datasize(mc_header);
-
-       if (data_size + MC_HEADER_SIZE > total_size) {
-               if (print_err)
-                       pr_err("Error: bad microcode data file size.\n");
-               return -EINVAL;
-       }
-
-       if (mc_header->ldrver != 1 || mc_header->hdrver != hdr_type) {
-               if (print_err)
-                       pr_err("Error: invalid/unknown microcode update format. Header type %d\n",
-                              mc_header->hdrver);
-               return -EINVAL;
-       }
-
-       ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
-       if (ext_table_size) {
-               u32 ext_table_sum = 0;
-               u32 *ext_tablep;
-
-               if (ext_table_size < EXT_HEADER_SIZE ||
-                   ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
-                       if (print_err)
-                               pr_err("Error: truncated extended signature table.\n");
-                       return -EINVAL;
-               }
-
-               ext_header = mc + MC_HEADER_SIZE + data_size;
-               if (ext_table_size != exttable_size(ext_header)) {
-                       if (print_err)
-                               pr_err("Error: extended signature table size mismatch.\n");
-                       return -EFAULT;
-               }
-
-               ext_sigcount = ext_header->count;
-
-               /*
-                * Check extended table checksum: the sum of all dwords that
-                * comprise a valid table must be 0.
-                */
-               ext_tablep = (u32 *)ext_header;
-
-               i = ext_table_size / sizeof(u32);
-               while (i--)
-                       ext_table_sum += ext_tablep[i];
-
-               if (ext_table_sum) {
-                       if (print_err)
-                               pr_warn("Bad extended signature table checksum, aborting.\n");
-                       return -EINVAL;
-               }
-       }
-
-       /*
-        * Calculate the checksum of update data and header. The checksum of
-        * valid update data and header including the extended signature table
-        * must be 0.
-        */
-       orig_sum = 0;
-       i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
-       while (i--)
-               orig_sum += ((u32 *)mc)[i];
-
-       if (orig_sum) {
-               if (print_err)
-                       pr_err("Bad microcode data checksum, aborting.\n");
-               return -EINVAL;
-       }
-
-       if (!ext_table_size)
-               return 0;
-
-       /*
-        * Check extended signature checksum: 0 => valid.
-        */
-       for (i = 0; i < ext_sigcount; i++) {
-               ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
-                         EXT_SIGNATURE_SIZE * i;
-
-               sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
-                     (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
-               if (sum) {
-                       if (print_err)
-                               pr_err("Bad extended signature checksum, aborting.\n");
-                       return -EINVAL;
-               }
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(intel_microcode_sanity_check);
-
 static void early_init_intel(struct cpuinfo_x86 *c)
 {
        u64 misc_enable;
index 3b84761..e4c3ba9 100644 (file)
@@ -206,7 +206,7 @@ static int intel_epb_offline(unsigned int cpu)
 static const struct x86_cpu_id intel_epb_normal[] = {
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,
                                   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,
                                   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,
                                   ENERGY_PERF_BIAS_NORMAL_POWERSAVE),
index 5e74610..c4ec4ca 100644 (file)
@@ -1261,10 +1261,10 @@ static void __threshold_remove_blocks(struct threshold_bank *b)
        struct threshold_block *pos = NULL;
        struct threshold_block *tmp = NULL;
 
-       kobject_del(b->kobj);
+       kobject_put(b->kobj);
 
        list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
-               kobject_del(&pos->kobj);
+               kobject_put(b->kobj);
 }
 
 static void threshold_remove_bank(struct threshold_bank *bank)
index 89e2aab..6f35f72 100644 (file)
@@ -843,6 +843,26 @@ static noinstr bool quirk_skylake_repmov(void)
 }
 
 /*
+ * Some Zen-based Instruction Fetch Units set EIPV=RIPV=0 on poison consumption
+ * errors. This means mce_gather_info() will not save the "ip" and "cs" registers.
+ *
+ * However, the context is still valid, so save the "cs" register for later use.
+ *
+ * The "ip" register is truly unknown, so don't save it or fixup EIPV/RIPV.
+ *
+ * The Instruction Fetch Unit is at MCA bank 1 for all affected systems.
+ */
+static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_regs *regs)
+{
+       if (bank != 1)
+               return;
+       if (!(m->status & MCI_STATUS_POISON))
+               return;
+
+       m->cs = regs->cs;
+}
+
+/*
  * Do a quick check if any of the events requires a panic.
  * This decides if we keep the events around or clear them.
  */
@@ -861,6 +881,9 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo
                if (mce_flags.snb_ifu_quirk)
                        quirk_sandybridge_ifu(i, m, regs);
 
+               if (mce_flags.zen_ifu_quirk)
+                       quirk_zen_ifu(i, m, regs);
+
                m->bank = i;
                if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) {
                        mce_read_aux(m, i);
@@ -1608,6 +1631,13 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
        local_irq_restore(flags);
 }
 
+static void mc_poll_banks_default(void)
+{
+       machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+}
+
+void (*mc_poll_banks)(void) = mc_poll_banks_default;
+
 static void mce_timer_fn(struct timer_list *t)
 {
        struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
@@ -1618,7 +1648,7 @@ static void mce_timer_fn(struct timer_list *t)
        iv = __this_cpu_read(mce_next_interval);
 
        if (mce_available(this_cpu_ptr(&cpu_info))) {
-               machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+               mc_poll_banks();
 
                if (mce_intel_cmci_poll()) {
                        iv = mce_adjust_timer(iv);
@@ -1842,6 +1872,9 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
                if (c->x86 == 0x15 && c->x86_model <= 0xf)
                        mce_flags.overflow_recov = 1;
 
+               if (c->x86 >= 0x17 && c->x86 <= 0x1A)
+                       mce_flags.zen_ifu_quirk = 1;
+
        }
 
        if (c->x86_vendor == X86_VENDOR_INTEL) {
index 95275a5..f532355 100644 (file)
@@ -56,6 +56,13 @@ static DEFINE_PER_CPU(int, cmci_backoff_cnt);
  */
 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
+/*
+ * On systems that do support CMCI but it's disabled, polling for MCEs can
+ * cause the same event to be reported multiple times because IA32_MCi_STATUS
+ * is shared by the same package.
+ */
+static DEFINE_SPINLOCK(cmci_poll_lock);
+
 #define CMCI_THRESHOLD         1
 #define CMCI_POLL_INTERVAL     (30 * HZ)
 #define CMCI_STORM_INTERVAL    (HZ)
@@ -426,12 +433,22 @@ void cmci_disable_bank(int bank)
        raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
 }
 
+/* Bank polling function when CMCI is disabled. */
+static void cmci_mc_poll_banks(void)
+{
+       spin_lock(&cmci_poll_lock);
+       machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+       spin_unlock(&cmci_poll_lock);
+}
+
 void intel_init_cmci(void)
 {
        int banks;
 
-       if (!cmci_supported(&banks))
+       if (!cmci_supported(&banks)) {
+               mc_poll_banks = cmci_mc_poll_banks;
                return;
+       }
 
        mce_threshold_vector = intel_threshold_interrupt;
        cmci_discover(banks);
index d2412ce..bcf1b3c 100644 (file)
@@ -157,6 +157,9 @@ struct mce_vendor_flags {
         */
        smca                    : 1,
 
+       /* Zen IFU quirk */
+       zen_ifu_quirk           : 1,
+
        /* AMD-style error thresholding banks present. */
        amd_threshold           : 1,
 
@@ -172,7 +175,7 @@ struct mce_vendor_flags {
        /* Skylake, Cascade Lake, Cooper Lake REP;MOVS* quirk */
        skx_repmov_quirk        : 1,
 
-       __reserved_0            : 56;
+       __reserved_0            : 55;
 };
 
 extern struct mce_vendor_flags mce_flags;
@@ -274,4 +277,5 @@ static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
        return 0;
 }
 
+extern void (*mc_poll_banks)(void);
 #endif /* __X86_MCE_INTERNAL_H__ */
index 34098d4..193d98b 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 microcode-y                            := core.o
 obj-$(CONFIG_MICROCODE)                        += microcode.o
-microcode-$(CONFIG_MICROCODE_INTEL)    += intel.o
-microcode-$(CONFIG_MICROCODE_AMD)      += amd.o
+microcode-$(CONFIG_CPU_SUP_INTEL)      += intel.o
+microcode-$(CONFIG_CPU_SUP_AMD)                += amd.o
index 87208e4..bbd1dc3 100644 (file)
 #include <linux/kernel.h>
 #include <linux/pci.h>
 
-#include <asm/microcode_amd.h>
 #include <asm/microcode.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/cpu.h>
 #include <asm/msr.h>
 
+#include "internal.h"
+
+#define UCODE_MAGIC                    0x00414d44
+#define UCODE_EQUIV_CPU_TABLE_TYPE     0x00000000
+#define UCODE_UCODE_TYPE               0x00000001
+
+#define SECTION_HDR_SIZE               8
+#define CONTAINER_HDR_SZ               12
+
+struct equiv_cpu_entry {
+       u32     installed_cpu;
+       u32     fixed_errata_mask;
+       u32     fixed_errata_compare;
+       u16     equiv_cpu;
+       u16     res;
+} __packed;
+
+struct microcode_header_amd {
+       u32     data_code;
+       u32     patch_id;
+       u16     mc_patch_data_id;
+       u8      mc_patch_data_len;
+       u8      init_flag;
+       u32     mc_patch_data_checksum;
+       u32     nb_dev_id;
+       u32     sb_dev_id;
+       u16     processor_rev_id;
+       u8      nb_rev_id;
+       u8      sb_rev_id;
+       u8      bios_api_rev;
+       u8      reserved1[3];
+       u32     match_reg[8];
+} __packed;
+
+struct microcode_amd {
+       struct microcode_header_amd     hdr;
+       unsigned int                    mpb[];
+};
+
+#define PATCH_MAX_SIZE (3 * PAGE_SIZE)
+
 static struct equiv_cpu_table {
        unsigned int num_entries;
        struct equiv_cpu_entry *entry;
@@ -56,9 +96,6 @@ struct cont_desc {
 
 static u32 ucode_new_rev;
 
-/* One blob per node. */
-static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE];
-
 /*
  * Microcode patch container file is prepended to the initrd in cpio
  * format. See Documentation/arch/x86/microcode.rst
@@ -415,20 +452,17 @@ static int __apply_microcode_amd(struct microcode_amd *mc)
  *
  * Returns true if container found (sets @desc), false otherwise.
  */
-static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch)
+static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size)
 {
        struct cont_desc desc = { 0 };
-       u8 (*patch)[PATCH_MAX_SIZE];
        struct microcode_amd *mc;
        u32 rev, dummy, *new_rev;
        bool ret = false;
 
 #ifdef CONFIG_X86_32
        new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
-       patch   = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch);
 #else
        new_rev = &ucode_new_rev;
-       patch   = &amd_ucode_patch[0];
 #endif
 
        desc.cpuid_1_eax = cpuid_1_eax;
@@ -452,9 +486,6 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size, boo
        if (!__apply_microcode_amd(mc)) {
                *new_rev = mc->hdr.patch_id;
                ret      = true;
-
-               if (save_patch)
-                       memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE));
        }
 
        return ret;
@@ -507,7 +538,7 @@ static void find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpio_data
        *ret = cp;
 }
 
-void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
+static void apply_ucode_from_containers(unsigned int cpuid_1_eax)
 {
        struct cpio_data cp = { };
 
@@ -515,42 +546,12 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
        if (!(cp.data && cp.size))
                return;
 
-       early_apply_microcode(cpuid_1_eax, cp.data, cp.size, true);
+       early_apply_microcode(cpuid_1_eax, cp.data, cp.size);
 }
 
-void load_ucode_amd_ap(unsigned int cpuid_1_eax)
+void load_ucode_amd_early(unsigned int cpuid_1_eax)
 {
-       struct microcode_amd *mc;
-       struct cpio_data cp;
-       u32 *new_rev, rev, dummy;
-
-       if (IS_ENABLED(CONFIG_X86_32)) {
-               mc      = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
-               new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
-       } else {
-               mc      = (struct microcode_amd *)amd_ucode_patch;
-               new_rev = &ucode_new_rev;
-       }
-
-       native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
-       /*
-        * Check whether a new patch has been saved already. Also, allow application of
-        * the same revision in order to pick up SMT-thread-specific configuration even
-        * if the sibling SMT thread already has an up-to-date revision.
-        */
-       if (*new_rev && rev <= mc->hdr.patch_id) {
-               if (!__apply_microcode_amd(mc)) {
-                       *new_rev = mc->hdr.patch_id;
-                       return;
-               }
-       }
-
-       find_blobs_in_containers(cpuid_1_eax, &cp);
-       if (!(cp.data && cp.size))
-               return;
-
-       early_apply_microcode(cpuid_1_eax, cp.data, cp.size, false);
+       return apply_ucode_from_containers(cpuid_1_eax);
 }
 
 static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size);
@@ -578,23 +579,6 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
        return 0;
 }
 
-void reload_ucode_amd(unsigned int cpu)
-{
-       u32 rev, dummy __always_unused;
-       struct microcode_amd *mc;
-
-       mc = (struct microcode_amd *)amd_ucode_patch[cpu_to_node(cpu)];
-
-       rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-
-       if (rev < mc->hdr.patch_id) {
-               if (!__apply_microcode_amd(mc)) {
-                       ucode_new_rev = mc->hdr.patch_id;
-                       pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
-               }
-       }
-}
-
 /*
  * a small, trivial cache of per-family ucode patches
  */
@@ -655,6 +639,28 @@ static struct ucode_patch *find_patch(unsigned int cpu)
        return cache_find_patch(equiv_id);
 }
 
+void reload_ucode_amd(unsigned int cpu)
+{
+       u32 rev, dummy __always_unused;
+       struct microcode_amd *mc;
+       struct ucode_patch *p;
+
+       p = find_patch(cpu);
+       if (!p)
+               return;
+
+       mc = p->data;
+
+       rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
+
+       if (rev < mc->hdr.patch_id) {
+               if (!__apply_microcode_amd(mc)) {
+                       ucode_new_rev = mc->hdr.patch_id;
+                       pr_info("reload patch_level=0x%08x\n", ucode_new_rev);
+               }
+       }
+}
+
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
        struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -875,9 +881,6 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz
                        continue;
 
                ret = UCODE_NEW;
-
-               memset(&amd_ucode_patch[nid], 0, PATCH_MAX_SIZE);
-               memcpy(&amd_ucode_patch[nid], p->data, min_t(u32, p->size, PATCH_MAX_SIZE));
        }
 
        return ret;
index 3afcf3d..6cc7a2c 100644 (file)
 #include <linux/fs.h>
 #include <linux/mm.h>
 
-#include <asm/microcode_intel.h>
 #include <asm/cpu_device_id.h>
-#include <asm/microcode_amd.h>
 #include <asm/perf_event.h>
-#include <asm/microcode.h>
 #include <asm/processor.h>
 #include <asm/cmdline.h>
 #include <asm/setup.h>
 
+#include "internal.h"
+
 #define DRIVER_VERSION "2.2"
 
 static struct microcode_ops    *microcode_ops;
@@ -54,15 +53,12 @@ LIST_HEAD(microcode_cache);
  *
  * All non cpu-hotplug-callback call sites use:
  *
- * - microcode_mutex to synchronize with each other;
  * - cpus_read_lock/unlock() to synchronize with
  *   the cpu-hotplug-callback call sites.
  *
  * We guarantee that only a single cpu is being
  * updated at any particular moment of time.
  */
-static DEFINE_MUTEX(microcode_mutex);
-
 struct ucode_cpu_info          ucode_cpu_info[NR_CPUS];
 
 struct cpu_info_ctx {
@@ -172,7 +168,7 @@ void __init load_ucode_bsp(void)
        if (intel)
                load_ucode_intel_bsp();
        else
-               load_ucode_amd_bsp(cpuid_1_eax);
+               load_ucode_amd_early(cpuid_1_eax);
 }
 
 static bool check_loader_disabled_ap(void)
@@ -200,7 +196,7 @@ void load_ucode_ap(void)
                break;
        case X86_VENDOR_AMD:
                if (x86_family(cpuid_1_eax) >= 0x10)
-                       load_ucode_amd_ap(cpuid_1_eax);
+                       load_ucode_amd_early(cpuid_1_eax);
                break;
        default:
                break;
@@ -298,7 +294,7 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa)
 #endif
 }
 
-void reload_early_microcode(unsigned int cpu)
+static void reload_early_microcode(unsigned int cpu)
 {
        int vendor, family;
 
@@ -488,10 +484,7 @@ static ssize_t reload_store(struct device *dev,
        if (tmp_ret != UCODE_NEW)
                goto put;
 
-       mutex_lock(&microcode_mutex);
        ret = microcode_reload_late();
-       mutex_unlock(&microcode_mutex);
-
 put:
        cpus_read_unlock();
 
index 467cf37..94dd6af 100644 (file)
  * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com>
  *                   H Peter Anvin" <hpa@zytor.com>
  */
-
-/*
- * This needs to be before all headers so that pr_debug in printk.h doesn't turn
- * printk calls into no_printk().
- *
- *#define DEBUG
- */
 #define pr_fmt(fmt) "microcode: " fmt
-
 #include <linux/earlycpio.h>
 #include <linux/firmware.h>
 #include <linux/uaccess.h>
 #include <linux/uio.h>
 #include <linux/mm.h>
 
-#include <asm/microcode_intel.h>
 #include <asm/intel-family.h>
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/setup.h>
 #include <asm/msr.h>
 
+#include "internal.h"
+
 static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin";
 
 /* Current microcode patch used in early patching on the APs. */
@@ -45,6 +38,208 @@ static struct microcode_intel *intel_ucode_patch;
 /* last level cache size per core */
 static int llc_size_per_core;
 
+/* microcode format is extended from prescott processors */
+struct extended_signature {
+       unsigned int    sig;
+       unsigned int    pf;
+       unsigned int    cksum;
+};
+
+struct extended_sigtable {
+       unsigned int                    count;
+       unsigned int                    cksum;
+       unsigned int                    reserved[3];
+       struct extended_signature       sigs[];
+};
+
+#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
+#define EXT_HEADER_SIZE                (sizeof(struct extended_sigtable))
+#define EXT_SIGNATURE_SIZE     (sizeof(struct extended_signature))
+
+static inline unsigned int get_totalsize(struct microcode_header_intel *hdr)
+{
+       return hdr->datasize ? hdr->totalsize : DEFAULT_UCODE_TOTALSIZE;
+}
+
+static inline unsigned int exttable_size(struct extended_sigtable *et)
+{
+       return et->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE;
+}
+
+int intel_cpu_collect_info(struct ucode_cpu_info *uci)
+{
+       unsigned int val[2];
+       unsigned int family, model;
+       struct cpu_signature csig = { 0 };
+       unsigned int eax, ebx, ecx, edx;
+
+       memset(uci, 0, sizeof(*uci));
+
+       eax = 0x00000001;
+       ecx = 0;
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+       csig.sig = eax;
+
+       family = x86_family(eax);
+       model  = x86_model(eax);
+
+       if (model >= 5 || family > 6) {
+               /* get processor flags from MSR 0x17 */
+               native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+               csig.pf = 1 << ((val[1] >> 18) & 7);
+       }
+
+       csig.rev = intel_get_microcode_revision();
+
+       uci->cpu_sig = csig;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(intel_cpu_collect_info);
+
+/*
+ * Returns 1 if update has been found, 0 otherwise.
+ */
+int intel_find_matching_signature(void *mc, unsigned int csig, int cpf)
+{
+       struct microcode_header_intel *mc_hdr = mc;
+       struct extended_sigtable *ext_hdr;
+       struct extended_signature *ext_sig;
+       int i;
+
+       if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf))
+               return 1;
+
+       /* Look for ext. headers: */
+       if (get_totalsize(mc_hdr) <= intel_microcode_get_datasize(mc_hdr) + MC_HEADER_SIZE)
+               return 0;
+
+       ext_hdr = mc + intel_microcode_get_datasize(mc_hdr) + MC_HEADER_SIZE;
+       ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE;
+
+       for (i = 0; i < ext_hdr->count; i++) {
+               if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf))
+                       return 1;
+               ext_sig++;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(intel_find_matching_signature);
+
+/**
+ * intel_microcode_sanity_check() - Sanity check microcode file.
+ * @mc: Pointer to the microcode file contents.
+ * @print_err: Display failure reason if true, silent if false.
+ * @hdr_type: Type of file, i.e. normal microcode file or In Field Scan file.
+ *            Validate if the microcode header type matches with the type
+ *            specified here.
+ *
+ * Validate certain header fields and verify if computed checksum matches
+ * with the one specified in the header.
+ *
+ * Return: 0 if the file passes all the checks, -EINVAL if any of the checks
+ * fail.
+ */
+int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type)
+{
+       unsigned long total_size, data_size, ext_table_size;
+       struct microcode_header_intel *mc_header = mc;
+       struct extended_sigtable *ext_header = NULL;
+       u32 sum, orig_sum, ext_sigcount = 0, i;
+       struct extended_signature *ext_sig;
+
+       total_size = get_totalsize(mc_header);
+       data_size = intel_microcode_get_datasize(mc_header);
+
+       if (data_size + MC_HEADER_SIZE > total_size) {
+               if (print_err)
+                       pr_err("Error: bad microcode data file size.\n");
+               return -EINVAL;
+       }
+
+       if (mc_header->ldrver != 1 || mc_header->hdrver != hdr_type) {
+               if (print_err)
+                       pr_err("Error: invalid/unknown microcode update format. Header type %d\n",
+                              mc_header->hdrver);
+               return -EINVAL;
+       }
+
+       ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
+       if (ext_table_size) {
+               u32 ext_table_sum = 0;
+               u32 *ext_tablep;
+
+               if (ext_table_size < EXT_HEADER_SIZE ||
+                   ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
+                       if (print_err)
+                               pr_err("Error: truncated extended signature table.\n");
+                       return -EINVAL;
+               }
+
+               ext_header = mc + MC_HEADER_SIZE + data_size;
+               if (ext_table_size != exttable_size(ext_header)) {
+                       if (print_err)
+                               pr_err("Error: extended signature table size mismatch.\n");
+                       return -EFAULT;
+               }
+
+               ext_sigcount = ext_header->count;
+
+               /*
+                * Check extended table checksum: the sum of all dwords that
+                * comprise a valid table must be 0.
+                */
+               ext_tablep = (u32 *)ext_header;
+
+               i = ext_table_size / sizeof(u32);
+               while (i--)
+                       ext_table_sum += ext_tablep[i];
+
+               if (ext_table_sum) {
+                       if (print_err)
+                               pr_warn("Bad extended signature table checksum, aborting.\n");
+                       return -EINVAL;
+               }
+       }
+
+       /*
+        * Calculate the checksum of update data and header. The checksum of
+        * valid update data and header including the extended signature table
+        * must be 0.
+        */
+       orig_sum = 0;
+       i = (MC_HEADER_SIZE + data_size) / sizeof(u32);
+       while (i--)
+               orig_sum += ((u32 *)mc)[i];
+
+       if (orig_sum) {
+               if (print_err)
+                       pr_err("Bad microcode data checksum, aborting.\n");
+               return -EINVAL;
+       }
+
+       if (!ext_table_size)
+               return 0;
+
+       /*
+        * Check extended signature checksum: 0 => valid.
+        */
+       for (i = 0; i < ext_sigcount; i++) {
+               ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
+                         EXT_SIGNATURE_SIZE * i;
+
+               sum = (mc_header->sig + mc_header->pf + mc_header->cksum) -
+                     (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
+               if (sum) {
+                       if (print_err)
+                               pr_err("Bad extended signature checksum, aborting.\n");
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(intel_microcode_sanity_check);
+
 /*
  * Returns 1 if update has been found, 0 otherwise.
  */
@@ -202,86 +397,6 @@ next:
        return patch;
 }
 
-static void show_saved_mc(void)
-{
-#ifdef DEBUG
-       int i = 0, j;
-       unsigned int sig, pf, rev, total_size, data_size, date;
-       struct ucode_cpu_info uci;
-       struct ucode_patch *p;
-
-       if (list_empty(&microcode_cache)) {
-               pr_debug("no microcode data saved.\n");
-               return;
-       }
-
-       intel_cpu_collect_info(&uci);
-
-       sig     = uci.cpu_sig.sig;
-       pf      = uci.cpu_sig.pf;
-       rev     = uci.cpu_sig.rev;
-       pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev);
-
-       list_for_each_entry(p, &microcode_cache, plist) {
-               struct microcode_header_intel *mc_saved_header;
-               struct extended_sigtable *ext_header;
-               struct extended_signature *ext_sig;
-               int ext_sigcount;
-
-               mc_saved_header = (struct microcode_header_intel *)p->data;
-
-               sig     = mc_saved_header->sig;
-               pf      = mc_saved_header->pf;
-               rev     = mc_saved_header->rev;
-               date    = mc_saved_header->date;
-
-               total_size      = get_totalsize(mc_saved_header);
-               data_size       = get_datasize(mc_saved_header);
-
-               pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n",
-                        i++, sig, pf, rev, total_size,
-                        date & 0xffff,
-                        date >> 24,
-                        (date >> 16) & 0xff);
-
-               /* Look for ext. headers: */
-               if (total_size <= data_size + MC_HEADER_SIZE)
-                       continue;
-
-               ext_header = (void *)mc_saved_header + data_size + MC_HEADER_SIZE;
-               ext_sigcount = ext_header->count;
-               ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
-
-               for (j = 0; j < ext_sigcount; j++) {
-                       sig = ext_sig->sig;
-                       pf = ext_sig->pf;
-
-                       pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n",
-                                j, sig, pf);
-
-                       ext_sig++;
-               }
-       }
-#endif
-}
-
-/*
- * Save this microcode patch. It will be loaded early when a CPU is
- * hot-added or resumes.
- */
-static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size)
-{
-       /* Synchronization during CPU hotplug. */
-       static DEFINE_MUTEX(x86_cpu_microcode_mutex);
-
-       mutex_lock(&x86_cpu_microcode_mutex);
-
-       save_microcode_patch(uci, mc, size);
-       show_saved_mc();
-
-       mutex_unlock(&x86_cpu_microcode_mutex);
-}
-
 static bool load_builtin_intel_microcode(struct cpio_data *cp)
 {
        unsigned int eax = 1, ebx, ecx = 0, edx;
@@ -428,9 +543,6 @@ int __init save_microcode_in_initrd_intel(void)
        intel_cpu_collect_info(&uci);
 
        scan_microcode(cp.data, cp.size, &uci, true);
-
-       show_saved_mc();
-
        return 0;
 }
 
@@ -701,12 +813,8 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
        vfree(uci->mc);
        uci->mc = (struct microcode_intel *)new_mc;
 
-       /*
-        * If early loading microcode is supported, save this mc into
-        * permanent memory. So it will be loaded early when a CPU is hot added
-        * or resumes.
-        */
-       save_mc_for_early(uci, new_mc, new_mc_size);
+       /* Save for CPU hotplug */
+       save_microcode_patch(uci, new_mc, new_mc_size);
 
        pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
                 cpu, new_rev, uci->cpu_sig.rev);
diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h
new file mode 100644 (file)
index 0000000..bf883aa
--- /dev/null
@@ -0,0 +1,131 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_MICROCODE_INTERNAL_H
+#define _X86_MICROCODE_INTERNAL_H
+
+#include <linux/earlycpio.h>
+#include <linux/initrd.h>
+
+#include <asm/cpu.h>
+#include <asm/microcode.h>
+
+struct ucode_patch {
+       struct list_head plist;
+       void *data;             /* Intel uses only this one */
+       unsigned int size;
+       u32 patch_id;
+       u16 equiv_cpu;
+};
+
+extern struct list_head microcode_cache;
+
+struct device;
+
+enum ucode_state {
+       UCODE_OK        = 0,
+       UCODE_NEW,
+       UCODE_UPDATED,
+       UCODE_NFOUND,
+       UCODE_ERROR,
+};
+
+struct microcode_ops {
+       enum ucode_state (*request_microcode_fw)(int cpu, struct device *dev);
+
+       void (*microcode_fini_cpu)(int cpu);
+
+       /*
+        * The generic 'microcode_core' part guarantees that
+        * the callbacks below run on a target cpu when they
+        * are being called.
+        * See also the "Synchronization" section in microcode_core.c.
+        */
+       enum ucode_state (*apply_microcode)(int cpu);
+       int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
+};
+
+extern struct ucode_cpu_info ucode_cpu_info[];
+struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa);
+
+#define MAX_UCODE_COUNT 128
+
+#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24))
+#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u')
+#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I')
+#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l')
+#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h')
+#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i')
+#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D')
+
+#define CPUID_IS(a, b, c, ebx, ecx, edx)       \
+               (!(((ebx) ^ (a)) | ((edx) ^ (b)) | ((ecx) ^ (c))))
+
+/*
+ * In early loading microcode phase on BSP, boot_cpu_data is not set up yet.
+ * x86_cpuid_vendor() gets vendor id for BSP.
+ *
+ * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify
+ * coding, we still use x86_cpuid_vendor() to get vendor id for AP.
+ *
+ * x86_cpuid_vendor() gets vendor information directly from CPUID.
+ */
+static inline int x86_cpuid_vendor(void)
+{
+       u32 eax = 0x00000000;
+       u32 ebx, ecx = 0, edx;
+
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+
+       if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
+               return X86_VENDOR_INTEL;
+
+       if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx))
+               return X86_VENDOR_AMD;
+
+       return X86_VENDOR_UNKNOWN;
+}
+
+static inline unsigned int x86_cpuid_family(void)
+{
+       u32 eax = 0x00000001;
+       u32 ebx, ecx = 0, edx;
+
+       native_cpuid(&eax, &ebx, &ecx, &edx);
+
+       return x86_family(eax);
+}
+
+extern bool initrd_gone;
+
+#ifdef CONFIG_CPU_SUP_AMD
+void load_ucode_amd_bsp(unsigned int family);
+void load_ucode_amd_ap(unsigned int family);
+void load_ucode_amd_early(unsigned int cpuid_1_eax);
+int save_microcode_in_initrd_amd(unsigned int family);
+void reload_ucode_amd(unsigned int cpu);
+struct microcode_ops *init_amd_microcode(void);
+void exit_amd_microcode(void);
+#else /* CONFIG_CPU_SUP_AMD */
+static inline void load_ucode_amd_bsp(unsigned int family) { }
+static inline void load_ucode_amd_ap(unsigned int family) { }
+static inline void load_ucode_amd_early(unsigned int family) { }
+static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
+static inline void reload_ucode_amd(unsigned int cpu) { }
+static inline struct microcode_ops *init_amd_microcode(void) { return NULL; }
+static inline void exit_amd_microcode(void) { }
+#endif /* !CONFIG_CPU_SUP_AMD */
+
+#ifdef CONFIG_CPU_SUP_INTEL
+void load_ucode_intel_bsp(void);
+void load_ucode_intel_ap(void);
+int save_microcode_in_initrd_intel(void);
+void reload_ucode_intel(void);
+struct microcode_ops *init_intel_microcode(void);
+#else /* CONFIG_CPU_SUP_INTEL */
+static inline void load_ucode_intel_bsp(void) { }
+static inline void load_ucode_intel_ap(void) { }
+static inline int save_microcode_in_initrd_intel(void) { return -EINVAL; }
+static inline void reload_ucode_intel(void) { }
+static inline struct microcode_ops *init_intel_microcode(void) { return NULL; }
+#endif  /* !CONFIG_CPU_SUP_INTEL */
+
+#endif /* _X86_MICROCODE_INTERNAL_H */
index af5cbdd..f6d856b 100644 (file)
@@ -19,8 +19,7 @@
  * FPU state for a task MUST let the rest of the kernel know that the
  * FPU registers are no longer valid for this task.
  *
- * Either one of these invalidation functions is enough. Invalidate
- * a resource you control: CPU if using the CPU for something else
+ * Invalidate a resource you control: CPU if using the CPU for something else
  * (with preemption disabled), FPU for the current task, or a task that
  * is prevented from running by the current task.
  */
index 1015af1..98e507c 100644 (file)
@@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void)
        struct fpu *fpu = &current->thread.fpu;
 
        fpregs_lock();
-       fpu__drop(fpu);
+       __fpu_invalidate_fpregs_state(fpu);
        /*
         * This does not change the actual hardware registers. It just
         * resets the memory image and sets TIF_NEED_FPU_LOAD so a
index 0bab497..1afbc48 100644 (file)
@@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
                goto out_disable;
        }
 
+       /*
+        * CPU capabilities initialization runs before FPU init. So
+        * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
+        * functional, set the feature bit so depending code works.
+        */
+       setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
+
        print_xstate_offset_size();
        pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
                fpu_kernel_cfg.max_features,
index 01e8f34..12df54f 100644 (file)
@@ -282,7 +282,6 @@ static inline void tramp_free(void *tramp) { }
 
 /* Defined as markers to the end of the ftrace default trampolines */
 extern void ftrace_regs_caller_end(void);
-extern void ftrace_regs_caller_ret(void);
 extern void ftrace_caller_end(void);
 extern void ftrace_caller_op_ptr(void);
 extern void ftrace_regs_caller_op_ptr(void);
index c5b9289..ea69959 100644 (file)
@@ -51,7 +51,9 @@ SYM_CODE_START_NOALIGN(startup_64)
         * for us.  These identity mapped page tables map all of the
         * kernel pages and possibly all of memory.
         *
-        * %rsi holds a physical pointer to real_mode_data.
+        * %RSI holds the physical address of the boot_params structure
+        * provided by the bootloader. Preserve it in %R15 so C function calls
+        * will not clobber it.
         *
         * We come here either directly from a 64bit bootloader, or from
         * arch/x86/boot/compressed/head_64.S.
@@ -62,6 +64,7 @@ SYM_CODE_START_NOALIGN(startup_64)
         * compiled to run at we first fixup the physical addresses in our page
         * tables and then reload them.
         */
+       mov     %rsi, %r15
 
        /* Set up the stack for verify_cpu() */
        leaq    (__end_init_task - PTREGS_SIZE)(%rip), %rsp
@@ -75,9 +78,7 @@ SYM_CODE_START_NOALIGN(startup_64)
        shrq    $32,  %rdx
        wrmsr
 
-       pushq   %rsi
        call    startup_64_setup_env
-       popq    %rsi
 
        /* Now switch to __KERNEL_CS so IRET works reliably */
        pushq   $__KERNEL_CS
@@ -93,12 +94,10 @@ SYM_CODE_START_NOALIGN(startup_64)
         * Activate SEV/SME memory encryption if supported/enabled. This needs to
         * be done now, since this also includes setup of the SEV-SNP CPUID table,
         * which needs to be done before any CPUID instructions are executed in
-        * subsequent code.
+        * subsequent code. Pass the boot_params pointer as the first argument.
         */
-       movq    %rsi, %rdi
-       pushq   %rsi
+       movq    %r15, %rdi
        call    sme_enable
-       popq    %rsi
 #endif
 
        /* Sanitize CPU configuration */
@@ -111,9 +110,8 @@ SYM_CODE_START_NOALIGN(startup_64)
         * programmed into CR3.
         */
        leaq    _text(%rip), %rdi
-       pushq   %rsi
+       movq    %r15, %rsi
        call    __startup_64
-       popq    %rsi
 
        /* Form the CR3 value being sure to include the CR3 modifier */
        addq    $(early_top_pgt - __START_KERNEL_map), %rax
@@ -127,8 +125,6 @@ SYM_CODE_START(secondary_startup_64)
         * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
         * and someone has loaded a mapped page table.
         *
-        * %rsi holds a physical pointer to real_mode_data.
-        *
         * We come here either from startup_64 (using physical addresses)
         * or from trampoline.S (using virtual addresses).
         *
@@ -153,6 +149,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
        UNWIND_HINT_END_OF_STACK
        ANNOTATE_NOENDBR
 
+       /* Clear %R15 which holds the boot_params pointer on the boot CPU */
+       xorq    %r15, %r15
+
        /*
         * Retrieve the modifier (SME encryption mask if SME is active) to be
         * added to the initial pgdir entry that will be programmed into CR3.
@@ -199,13 +198,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
         * hypervisor could lie about the C-bit position to perform a ROP
         * attack on the guest by writing to the unencrypted stack and wait for
         * the next RET instruction.
-        * %rsi carries pointer to realmode data and is callee-clobbered. Save
-        * and restore it.
         */
-       pushq   %rsi
        movq    %rax, %rdi
        call    sev_verify_cbit
-       popq    %rsi
 
        /*
         * Switch to new page-table
@@ -365,9 +360,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
        wrmsr
 
        /* Setup and Load IDT */
-       pushq   %rsi
        call    early_setup_idt
-       popq    %rsi
 
        /* Check if nx is implemented */
        movl    $0x80000001, %eax
@@ -403,9 +396,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
        pushq $0
        popfq
 
-       /* rsi is pointer to real mode structure with interesting info.
-          pass it to C */
-       movq    %rsi, %rdi
+       /* Pass the boot_params pointer as first argument */
+       movq    %r15, %rdi
 
 .Ljump_to_C_code:
        /*
index c8eb1ac..1648aa0 100644 (file)
@@ -421,7 +421,7 @@ static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc)
         * the IO_APIC has been initialized.
         */
        hc->cpu = boot_cpu_data.cpu_index;
-       strncpy(hc->name, "hpet", sizeof(hc->name));
+       strscpy(hc->name, "hpet", sizeof(hc->name));
        hpet_init_clockevent(hc, 50);
 
        hc->evt.tick_resume     = hpet_clkevt_legacy_resume;
index 57b0037..517821b 100644 (file)
@@ -226,7 +226,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
 }
 
 /* Check whether insn is indirect jump */
-static int __insn_is_indirect_jump(struct insn *insn)
+static int insn_is_indirect_jump(struct insn *insn)
 {
        return ((insn->opcode.bytes[0] == 0xff &&
                (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -260,26 +260,6 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
        return (start <= target && target <= start + len);
 }
 
-static int insn_is_indirect_jump(struct insn *insn)
-{
-       int ret = __insn_is_indirect_jump(insn);
-
-#ifdef CONFIG_RETPOLINE
-       /*
-        * Jump to x86_indirect_thunk_* is treated as an indirect jump.
-        * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
-        * older gcc may use indirect jump. So we add this check instead of
-        * replace indirect-jump check.
-        */
-       if (!ret)
-               ret = insn_jump_into_range(insn,
-                               (unsigned long)__indirect_thunk_start,
-                               (unsigned long)__indirect_thunk_end -
-                               (unsigned long)__indirect_thunk_start);
-#endif
-       return ret;
-}
-
 /* Decode whole function to ensure any instructions don't jump into target */
 static int can_optimize(unsigned long paddr)
 {
@@ -334,9 +314,21 @@ static int can_optimize(unsigned long paddr)
                /* Recover address */
                insn.kaddr = (void *)addr;
                insn.next_byte = (void *)(addr + insn.length);
-               /* Check any instructions don't jump into target */
-               if (insn_is_indirect_jump(&insn) ||
-                   insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
+               /*
+                * Check any instructions don't jump into target, indirectly or
+                * directly.
+                *
+                * The indirect case is present to handle a code with jump
+                * tables. When the kernel uses retpolines, the check should in
+                * theory additionally look for jumps to indirect thunks.
+                * However, the kernel built with retpolines or IBT has jump
+                * tables disabled so the check can be skipped altogether.
+                */
+               if (!IS_ENABLED(CONFIG_RETPOLINE) &&
+                   !IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
+                   insn_is_indirect_jump(&insn))
+                       return 0;
+               if (insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
                                         DISP32_SIZE))
                        return 0;
                addr += insn.length;
index 1cceac5..526d4da 100644 (file)
@@ -966,10 +966,8 @@ static void __init kvm_init_platform(void)
                 * Ensure that _bss_decrypted section is marked as decrypted in the
                 * shared pages list.
                 */
-               nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
-                                       PAGE_SIZE);
                early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
-                                               nr_pages, 0);
+                                               __end_bss_decrypted - __start_bss_decrypted, 0);
 
                /*
                 * If not booted using EFI, enable Live migration support.
index b05f62e..5f71a0c 100644 (file)
@@ -358,7 +358,7 @@ int module_finalize(const Elf_Ehdr *hdr,
        }
        if (ibt_endbr) {
                void *iseg = (void *)ibt_endbr->sh_addr;
-               apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size);
+               apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size);
        }
        if (locks) {
                void *lseg = (void *)locks->sh_addr;
index ac10b46..975f98d 100644 (file)
@@ -75,10 +75,16 @@ DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
 
 void __init native_pv_lock_init(void)
 {
-       if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
+       if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) &&
+           !boot_cpu_has(X86_FEATURE_HYPERVISOR))
                static_branch_disable(&virt_spin_lock_key);
 }
 
+static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+       tlb_remove_page(tlb, table);
+}
+
 unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr,
                            unsigned int len)
 {
@@ -295,8 +301,7 @@ struct paravirt_patch_template pv_ops = {
        .mmu.flush_tlb_kernel   = native_flush_tlb_global,
        .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
        .mmu.flush_tlb_multi    = native_flush_tlb_multi,
-       .mmu.tlb_remove_table   =
-                       (void (*)(struct mmu_gather *, void *))tlb_remove_page,
+       .mmu.tlb_remove_table   = native_tlb_remove_table,
 
        .mmu.exit_mmap          = paravirt_nop,
        .mmu.notify_page_enc_status_changed     = paravirt_nop,
index ff9b80a..72015db 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/static_call.h>
 #include <trace/events/power.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/entry-common.h>
 #include <asm/cpu.h>
 #include <asm/apic.h>
 #include <linux/uaccess.h>
@@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
                return do_set_thread_area_64(p, ARCH_SET_FS, tls);
 }
 
+__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
+                                    int (*fn)(void *), void *fn_arg)
+{
+       schedule_tail(prev);
+
+       /* Is this a kernel thread? */
+       if (unlikely(fn)) {
+               fn(fn_arg);
+               /*
+                * A kernel thread is allowed to return here after successfully
+                * calling kernel_execve().  Exit to userspace to complete the
+                * execve() syscall.
+                */
+               regs->ax = 0;
+       }
+
+       syscall_exit_to_user_mode(regs);
+}
+
 int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 {
        unsigned long clone_flags = args->flags;
@@ -149,7 +169,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
        frame = &fork_frame->frame;
 
        frame->bp = encode_frame_pointer(childregs);
-       frame->ret_addr = (unsigned long) ret_from_fork;
+       frame->ret_addr = (unsigned long) ret_from_fork_asm;
        p->thread.sp = (unsigned long) fork_frame;
        p->thread.io_bitmap = NULL;
        p->thread.iopl_warn = 0;
index 1ee7bed..d380c93 100644 (file)
@@ -1575,6 +1575,9 @@ static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
        long val, *reg = vc_insn_get_rm(ctxt);
        enum es_result ret;
 
+       if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
+               return ES_VMM_ERROR;
+
        if (!reg)
                return ES_DECODE_FAILED;
 
@@ -1612,6 +1615,9 @@ static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
        struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
        long *reg = vc_insn_get_rm(ctxt);
 
+       if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP)
+               return ES_VMM_ERROR;
+
        if (!reg)
                return ES_DECODE_FAILED;
 
index e1aa2cd..d40ed3a 100644 (file)
@@ -327,14 +327,6 @@ static void notrace start_secondary(void *unused)
 }
 
 /**
- * topology_smt_supported - Check whether SMT is supported by the CPUs
- */
-bool topology_smt_supported(void)
-{
-       return smp_num_siblings > 1;
-}
-
-/**
  * topology_phys_to_logical_pkg - Map a physical package id to a logical
  * @phys_pkg:  The physical package id to map
  *
@@ -632,14 +624,9 @@ static void __init build_sched_topology(void)
        };
 #endif
 #ifdef CONFIG_SCHED_CLUSTER
-       /*
-        * For now, skip the cluster domain on Hybrid.
-        */
-       if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
-               x86_topology[i++] = (struct sched_domain_topology_level){
-                       cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS)
-               };
-       }
+       x86_topology[i++] = (struct sched_domain_topology_level){
+               cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS)
+       };
 #endif
 #ifdef CONFIG_SCHED_MC
        x86_topology[i++] = (struct sched_domain_topology_level){
index b70670a..77a9316 100644 (file)
@@ -186,6 +186,19 @@ EXPORT_SYMBOL_GPL(arch_static_call_transform);
  */
 bool __static_call_fixup(void *tramp, u8 op, void *dest)
 {
+       unsigned long addr = (unsigned long)tramp;
+       /*
+        * Not all .return_sites are a static_call trampoline (most are not).
+        * Check if the 3 bytes after the return are still kernel text, if not,
+        * then this definitely is not a trampoline and we need not worry
+        * further.
+        *
+        * This avoids the memcmp() below tripping over pagefaults etc..
+        */
+       if (((addr >> PAGE_SHIFT) != ((addr + 7) >> PAGE_SHIFT)) &&
+           !kernel_text_address(addr + 7))
+               return false;
+
        if (memcmp(tramp+5, tramp_ud, 3)) {
                /* Not a trampoline site, not our problem. */
                return false;
index 58b1f20..4a817d2 100644 (file)
@@ -697,9 +697,10 @@ static bool try_fixup_enqcmd_gp(void)
 }
 
 static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
-                                   unsigned long error_code, const char *str)
+                                   unsigned long error_code, const char *str,
+                                   unsigned long address)
 {
-       if (fixup_exception(regs, trapnr, error_code, 0))
+       if (fixup_exception(regs, trapnr, error_code, address))
                return true;
 
        current->thread.error_code = error_code;
@@ -759,7 +760,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
                goto exit;
        }
 
-       if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc))
+       if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0))
                goto exit;
 
        if (error_code)
@@ -1357,17 +1358,20 @@ DEFINE_IDTENTRY(exc_device_not_available)
 
 #define VE_FAULT_STR "VE fault"
 
-static void ve_raise_fault(struct pt_regs *regs, long error_code)
+static void ve_raise_fault(struct pt_regs *regs, long error_code,
+                          unsigned long address)
 {
        if (user_mode(regs)) {
                gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR);
                return;
        }
 
-       if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR))
+       if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code,
+                                   VE_FAULT_STR, address)) {
                return;
+       }
 
-       die_addr(VE_FAULT_STR, regs, error_code, 0);
+       die_addr(VE_FAULT_STR, regs, error_code, address);
 }
 
 /*
@@ -1431,7 +1435,7 @@ DEFINE_IDTENTRY(exc_virtualization_exception)
         * it successfully, treat it as #GP(0) and handle it.
         */
        if (!tdx_handle_virt_exception(regs, &ve))
-               ve_raise_fault(regs, 0);
+               ve_raise_fault(regs, 0, ve.gla);
 
        cond_local_irq_disable(regs);
 }
index 3425c6a..15f97c0 100644 (file)
@@ -1258,7 +1258,7 @@ static void __init check_system_tsc_reliable(void)
        if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
            boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
            boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
-           nr_online_nodes <= 2)
+           nr_online_nodes <= 4)
                tsc_disable_clocksource_watchdog();
 }
 
index 03c885d..83d41c2 100644 (file)
@@ -133,14 +133,26 @@ SECTIONS
                KPROBES_TEXT
                SOFTIRQENTRY_TEXT
 #ifdef CONFIG_RETPOLINE
-               __indirect_thunk_start = .;
-               *(.text.__x86.*)
-               __indirect_thunk_end = .;
+               *(.text..__x86.indirect_thunk)
+               *(.text..__x86.return_thunk)
 #endif
                STATIC_CALL_TEXT
 
                ALIGN_ENTRY_TEXT_BEGIN
+#ifdef CONFIG_CPU_SRSO
+               *(.text..__x86.rethunk_untrain)
+#endif
+
                ENTRY_TEXT
+
+#ifdef CONFIG_CPU_SRSO
+               /*
+                * See the comment above srso_alias_untrain_ret()'s
+                * definition.
+                */
+               . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
+               *(.text..__x86.rethunk_safe)
+#endif
                ALIGN_ENTRY_TEXT_END
                *(.gnu.warning)
 
@@ -509,7 +521,24 @@ INIT_PER_CPU(irq_stack_backing_store);
 #endif
 
 #ifdef CONFIG_RETHUNK
-. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned");
+. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
+. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+#endif
+
+#ifdef CONFIG_CPU_SRSO
+/*
+ * GNU ld cannot do XOR until 2.41.
+ * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
+ *
+ * LLVM lld cannot do XOR until lld-17.
+ * https://github.com/llvm/llvm-project/commit/fae96104d4378166cbe5c875ef8ed808a356f3fb
+ *
+ * Instead do: (A | B) - (A & B) in order to compute the XOR
+ * of the two function addresses:
+ */
+. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) -
+               (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
+               "SRSO function pair won't alias");
 #endif
 
 #endif /* CONFIG_X86_64 */
index 7f4d133..d343268 100644 (file)
@@ -729,6 +729,9 @@ void kvm_set_cpu_caps(void)
                F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */
        );
 
+       if (cpu_feature_enabled(X86_FEATURE_SRSO_NO))
+               kvm_cpu_cap_set(X86_FEATURE_SRSO_NO);
+
        kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
                F(PERFMON_V2)
        );
index 113ca96..a983a16 100644 (file)
@@ -637,16 +637,22 @@ bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
        *max_irr = -1;
 
        for (i = vec = 0; i <= 7; i++, vec += 32) {
+               u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
+
+               irr_val = *p_irr;
                pir_val = READ_ONCE(pir[i]);
-               irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
+
                if (pir_val) {
+                       pir_val = xchg(&pir[i], 0);
+
                        prev_irr_val = irr_val;
-                       irr_val |= xchg(&pir[i], 0);
-                       *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
-                       if (prev_irr_val != irr_val) {
-                               max_updated_irr =
-                                       __fls(irr_val ^ prev_irr_val) + vec;
-                       }
+                       do {
+                               irr_val = prev_irr_val | pir_val;
+                       } while (prev_irr_val != irr_val &&
+                                !try_cmpxchg(p_irr, &prev_irr_val, irr_val));
+
+                       if (prev_irr_val != irr_val)
+                               max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
                }
                if (irr_val)
                        *max_irr = __fls(irr_val) + vec;
@@ -660,8 +666,11 @@ EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
 {
        struct kvm_lapic *apic = vcpu->arch.apic;
+       bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
 
-       return __kvm_apic_update_irr(pir, apic->regs, max_irr);
+       if (unlikely(!apic->apicv_active && irr_updated))
+               apic->irr_pending = true;
+       return irr_updated;
 }
 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 
index 07756b7..d3aec1f 100644 (file)
@@ -2417,15 +2417,18 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
         */
        memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
 
-       vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
-       vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
+       BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap));
+       memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap));
 
-       svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
+       vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb);
+       vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb);
 
-       if (ghcb_xcr0_is_valid(ghcb)) {
+       svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb);
+
+       if (kvm_ghcb_xcr0_is_valid(svm)) {
                vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
                kvm_update_cpuid_runtime(vcpu);
        }
@@ -2436,84 +2439,88 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
        control->exit_code_hi = upper_32_bits(exit_code);
        control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
        control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
+       svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb);
 
        /* Clear the valid entries fields */
        memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
 }
 
+static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
+{
+       return (((u64)control->exit_code_hi) << 32) | control->exit_code;
+}
+
 static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
 {
-       struct kvm_vcpu *vcpu;
-       struct ghcb *ghcb;
+       struct vmcb_control_area *control = &svm->vmcb->control;
+       struct kvm_vcpu *vcpu = &svm->vcpu;
        u64 exit_code;
        u64 reason;
 
-       ghcb = svm->sev_es.ghcb;
-
        /*
         * Retrieve the exit code now even though it may not be marked valid
         * as it could help with debugging.
         */
-       exit_code = ghcb_get_sw_exit_code(ghcb);
+       exit_code = kvm_ghcb_get_sw_exit_code(control);
 
        /* Only GHCB Usage code 0 is supported */
-       if (ghcb->ghcb_usage) {
+       if (svm->sev_es.ghcb->ghcb_usage) {
                reason = GHCB_ERR_INVALID_USAGE;
                goto vmgexit_err;
        }
 
        reason = GHCB_ERR_MISSING_INPUT;
 
-       if (!ghcb_sw_exit_code_is_valid(ghcb) ||
-           !ghcb_sw_exit_info_1_is_valid(ghcb) ||
-           !ghcb_sw_exit_info_2_is_valid(ghcb))
+       if (!kvm_ghcb_sw_exit_code_is_valid(svm) ||
+           !kvm_ghcb_sw_exit_info_1_is_valid(svm) ||
+           !kvm_ghcb_sw_exit_info_2_is_valid(svm))
                goto vmgexit_err;
 
-       switch (ghcb_get_sw_exit_code(ghcb)) {
+       switch (exit_code) {
        case SVM_EXIT_READ_DR7:
                break;
        case SVM_EXIT_WRITE_DR7:
-               if (!ghcb_rax_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_EXIT_RDTSC:
                break;
        case SVM_EXIT_RDPMC:
-               if (!ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rcx_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_EXIT_CPUID:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm))
                        goto vmgexit_err;
-               if (ghcb_get_rax(ghcb) == 0xd)
-                       if (!ghcb_xcr0_is_valid(ghcb))
+               if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd)
+                       if (!kvm_ghcb_xcr0_is_valid(svm))
                                goto vmgexit_err;
                break;
        case SVM_EXIT_INVD:
                break;
        case SVM_EXIT_IOIO:
-               if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
-                       if (!ghcb_sw_scratch_is_valid(ghcb))
+               if (control->exit_info_1 & SVM_IOIO_STR_MASK) {
+                       if (!kvm_ghcb_sw_scratch_is_valid(svm))
                                goto vmgexit_err;
                } else {
-                       if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
-                               if (!ghcb_rax_is_valid(ghcb))
+                       if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK))
+                               if (!kvm_ghcb_rax_is_valid(svm))
                                        goto vmgexit_err;
                }
                break;
        case SVM_EXIT_MSR:
-               if (!ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rcx_is_valid(svm))
                        goto vmgexit_err;
-               if (ghcb_get_sw_exit_info_1(ghcb)) {
-                       if (!ghcb_rax_is_valid(ghcb) ||
-                           !ghcb_rdx_is_valid(ghcb))
+               if (control->exit_info_1) {
+                       if (!kvm_ghcb_rax_is_valid(svm) ||
+                           !kvm_ghcb_rdx_is_valid(svm))
                                goto vmgexit_err;
                }
                break;
        case SVM_EXIT_VMMCALL:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_cpl_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_cpl_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_EXIT_RDTSCP:
@@ -2521,19 +2528,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
        case SVM_EXIT_WBINVD:
                break;
        case SVM_EXIT_MONITOR:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb) ||
-                   !ghcb_rdx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm) ||
+                   !kvm_ghcb_rdx_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_EXIT_MWAIT:
-               if (!ghcb_rax_is_valid(ghcb) ||
-                   !ghcb_rcx_is_valid(ghcb))
+               if (!kvm_ghcb_rax_is_valid(svm) ||
+                   !kvm_ghcb_rcx_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_VMGEXIT_MMIO_READ:
        case SVM_VMGEXIT_MMIO_WRITE:
-               if (!ghcb_sw_scratch_is_valid(ghcb))
+               if (!kvm_ghcb_sw_scratch_is_valid(svm))
                        goto vmgexit_err;
                break;
        case SVM_VMGEXIT_NMI_COMPLETE:
@@ -2549,11 +2556,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
        return 0;
 
 vmgexit_err:
-       vcpu = &svm->vcpu;
-
        if (reason == GHCB_ERR_INVALID_USAGE) {
                vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
-                           ghcb->ghcb_usage);
+                           svm->sev_es.ghcb->ghcb_usage);
        } else if (reason == GHCB_ERR_INVALID_EVENT) {
                vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
                            exit_code);
@@ -2563,11 +2568,8 @@ vmgexit_err:
                dump_ghcb(svm);
        }
 
-       /* Clear the valid entries fields */
-       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
-
-       ghcb_set_sw_exit_info_1(ghcb, 2);
-       ghcb_set_sw_exit_info_2(ghcb, reason);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, reason);
 
        /* Resume the guest to "return" the error code. */
        return 1;
@@ -2586,7 +2588,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
                 */
                if (svm->sev_es.ghcb_sa_sync) {
                        kvm_write_guest(svm->vcpu.kvm,
-                                       ghcb_get_sw_scratch(svm->sev_es.ghcb),
+                                       svm->sev_es.sw_scratch,
                                        svm->sev_es.ghcb_sa,
                                        svm->sev_es.ghcb_sa_len);
                        svm->sev_es.ghcb_sa_sync = false;
@@ -2632,12 +2634,11 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
 static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
 {
        struct vmcb_control_area *control = &svm->vmcb->control;
-       struct ghcb *ghcb = svm->sev_es.ghcb;
        u64 ghcb_scratch_beg, ghcb_scratch_end;
        u64 scratch_gpa_beg, scratch_gpa_end;
        void *scratch_va;
 
-       scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+       scratch_gpa_beg = svm->sev_es.sw_scratch;
        if (!scratch_gpa_beg) {
                pr_err("vmgexit: scratch gpa not provided\n");
                goto e_scratch;
@@ -2708,8 +2709,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
        return 0;
 
 e_scratch:
-       ghcb_set_sw_exit_info_1(ghcb, 2);
-       ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
 
        return 1;
 }
@@ -2822,7 +2823,6 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        struct vmcb_control_area *control = &svm->vmcb->control;
        u64 ghcb_gpa, exit_code;
-       struct ghcb *ghcb;
        int ret;
 
        /* Validate the GHCB */
@@ -2847,20 +2847,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
        }
 
        svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
-       ghcb = svm->sev_es.ghcb_map.hva;
 
-       trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
-
-       exit_code = ghcb_get_sw_exit_code(ghcb);
+       trace_kvm_vmgexit_enter(vcpu->vcpu_id, svm->sev_es.ghcb);
 
+       sev_es_sync_from_ghcb(svm);
        ret = sev_es_validate_vmgexit(svm);
        if (ret)
                return ret;
 
-       sev_es_sync_from_ghcb(svm);
-       ghcb_set_sw_exit_info_1(ghcb, 0);
-       ghcb_set_sw_exit_info_2(ghcb, 0);
+       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 0);
+       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 0);
 
+       exit_code = kvm_ghcb_get_sw_exit_code(control);
        switch (exit_code) {
        case SVM_VMGEXIT_MMIO_READ:
                ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
@@ -2898,13 +2896,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
                        break;
                case 1:
                        /* Get AP jump table address */
-                       ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+                       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, sev->ap_jump_table);
                        break;
                default:
                        pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
                               control->exit_info_1);
-                       ghcb_set_sw_exit_info_1(ghcb, 2);
-                       ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
+                       ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+                       ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
                }
 
                ret = 1;
index d381ad4..d4bfdc6 100644 (file)
@@ -1498,7 +1498,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
        if (sd->current_vmcb != svm->vmcb) {
                sd->current_vmcb = svm->vmcb;
-               indirect_branch_prediction_barrier();
+
+               if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT))
+                       indirect_branch_prediction_barrier();
        }
        if (kvm_vcpu_apicv_active(vcpu))
                avic_vcpu_load(vcpu, cpu);
@@ -1786,6 +1788,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
        }
 }
 
+static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+       return true;
+}
+
 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -3986,14 +3993,8 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
 
 static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 {
-       struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
-
-       /*
-        * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
-        * can't read guest memory (dereference memslots) to decode the WRMSR.
-        */
-       if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
-           nrips && control->next_rip)
+       if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+           to_svm(vcpu)->vmcb->control.exit_info_1)
                return handle_fastpath_set_msr_irqoff(vcpu);
 
        return EXIT_FASTPATH_NONE;
@@ -4005,6 +4006,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
 
        guest_state_enter_irqoff();
 
+       amd_clear_divider();
+
        if (sev_es_guest(vcpu->kvm))
                __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
        else
@@ -4815,6 +4818,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
        .set_segment = svm_set_segment,
        .get_cpl = svm_get_cpl,
        .get_cs_db_l_bits = svm_get_cs_db_l_bits,
+       .is_valid_cr0 = svm_is_valid_cr0,
        .set_cr0 = svm_set_cr0,
        .post_set_cr3 = sev_post_set_cr3,
        .is_valid_cr4 = svm_is_valid_cr4,
index 18af7e7..8239c8d 100644 (file)
@@ -190,10 +190,12 @@ struct vcpu_sev_es_state {
        /* SEV-ES support */
        struct sev_es_save_area *vmsa;
        struct ghcb *ghcb;
+       u8 valid_bitmap[16];
        struct kvm_host_map ghcb_map;
        bool received_first_sipi;
 
        /* SEV-ES scratch area support */
+       u64 sw_scratch;
        void *ghcb_sa;
        u32 ghcb_sa_len;
        bool ghcb_sa_sync;
@@ -744,4 +746,28 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
 void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
 
+#define DEFINE_KVM_GHCB_ACCESSORS(field)                                               \
+       static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \
+       {                                                                       \
+               return test_bit(GHCB_BITMAP_IDX(field),                         \
+                               (unsigned long *)&svm->sev_es.valid_bitmap);    \
+       }                                                                       \
+                                                                               \
+       static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \
+       {                                                                       \
+               return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \
+       }                                                                       \
+
+DEFINE_KVM_GHCB_ACCESSORS(cpl)
+DEFINE_KVM_GHCB_ACCESSORS(rax)
+DEFINE_KVM_GHCB_ACCESSORS(rcx)
+DEFINE_KVM_GHCB_ACCESSORS(rdx)
+DEFINE_KVM_GHCB_ACCESSORS(rbx)
+DEFINE_KVM_GHCB_ACCESSORS(rsi)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2)
+DEFINE_KVM_GHCB_ACCESSORS(sw_scratch)
+DEFINE_KVM_GHCB_ACCESSORS(xcr0)
+
 #endif
index 8e8295e..ef2ebab 100644 (file)
@@ -222,7 +222,7 @@ SYM_FUNC_START(__svm_vcpu_run)
         * because interrupt handlers won't sanitize 'ret' if the return is
         * from the kernel.
         */
-       UNTRAIN_RET
+       UNTRAIN_RET_VM
 
        /*
         * Clear all general purpose registers except RSP and RAX to prevent
@@ -359,7 +359,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
         * because interrupt handlers won't sanitize RET if the return is
         * from the kernel.
         */
-       UNTRAIN_RET
+       UNTRAIN_RET_VM
 
        /* "Pop" @spec_ctrl_intercepted.  */
        pop %_ASM_BX
index 07e927d..be275a0 100644 (file)
@@ -303,10 +303,8 @@ SYM_FUNC_START(vmx_do_nmi_irqoff)
        VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
 SYM_FUNC_END(vmx_do_nmi_irqoff)
 
-
-.section .text, "ax"
-
 #ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
 /**
  * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
  * @field:     VMCS field encoding that failed
@@ -335,7 +333,7 @@ SYM_FUNC_START(vmread_error_trampoline)
        mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
        mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1
 
-       call vmread_error
+       call vmread_error_trampoline2
 
        /* Zero out @fault, which will be popped into the result register. */
        _ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
@@ -357,6 +355,8 @@ SYM_FUNC_START(vmread_error_trampoline)
 SYM_FUNC_END(vmread_error_trampoline)
 #endif
 
+.section .text, "ax"
+
 SYM_FUNC_START(vmx_do_interrupt_irqoff)
        VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
 SYM_FUNC_END(vmx_do_interrupt_irqoff)
index 0ecf4be..df461f3 100644 (file)
@@ -441,13 +441,23 @@ do {                                      \
        pr_warn_ratelimited(fmt);       \
 } while (0)
 
-void vmread_error(unsigned long field, bool fault)
+noinline void vmread_error(unsigned long field)
 {
-       if (fault)
+       vmx_insn_failed("vmread failed: field=%lx\n", field);
+}
+
+#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+noinstr void vmread_error_trampoline2(unsigned long field, bool fault)
+{
+       if (fault) {
                kvm_spurious_fault();
-       else
-               vmx_insn_failed("vmread failed: field=%lx\n", field);
+       } else {
+               instrumentation_begin();
+               vmread_error(field);
+               instrumentation_end();
+       }
 }
+#endif
 
 noinline void vmwrite_error(unsigned long field, unsigned long value)
 {
@@ -1503,6 +1513,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        unsigned long old_rflags;
 
+       /*
+        * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
+        * is an unrestricted guest in order to mark L2 as needing emulation
+        * if L1 runs L2 as a restricted guest.
+        */
        if (is_unrestricted_guest(vcpu)) {
                kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
                vmx->rflags = rflags;
@@ -3037,6 +3052,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
        struct vcpu_vmx *vmx = to_vmx(vcpu);
        struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
 
+       /*
+        * KVM should never use VM86 to virtualize Real Mode when L2 is active,
+        * as using VM86 is unnecessary if unrestricted guest is enabled, and
+        * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
+        * should VM-Fail and KVM should reject userspace attempts to stuff
+        * CR0.PG=0 when L2 is active.
+        */
+       WARN_ON_ONCE(is_guest_mode(vcpu));
+
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
        vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
@@ -3226,6 +3250,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
 #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
                          CPU_BASED_CR3_STORE_EXITING)
 
+static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+       if (is_guest_mode(vcpu))
+               return nested_guest_cr0_valid(vcpu, cr0);
+
+       if (to_vmx(vcpu)->nested.vmxon)
+               return nested_host_cr0_valid(vcpu, cr0);
+
+       return true;
+}
+
 void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3235,7 +3270,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
 
        hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
-       if (is_unrestricted_guest(vcpu))
+       if (enable_unrestricted_guest)
                hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
        else {
                hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
@@ -3263,7 +3298,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
        }
 #endif
 
-       if (enable_ept && !is_unrestricted_guest(vcpu)) {
+       if (enable_ept && !enable_unrestricted_guest) {
                /*
                 * Ensure KVM has an up-to-date snapshot of the guest's CR3.  If
                 * the below code _enables_ CR3 exiting, vmx_cache_reg() will
@@ -3394,7 +3429,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
         * this bit, even if host CR4.MCE == 0.
         */
        hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
-       if (is_unrestricted_guest(vcpu))
+       if (enable_unrestricted_guest)
                hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
        else if (vmx->rmode.vm86_active)
                hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
@@ -3414,7 +3449,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
        vcpu->arch.cr4 = cr4;
        kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
 
-       if (!is_unrestricted_guest(vcpu)) {
+       if (!enable_unrestricted_guest) {
                if (enable_ept) {
                        if (!is_paging(vcpu)) {
                                hw_cr4 &= ~X86_CR4_PAE;
@@ -4651,7 +4686,8 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
        if (kvm_vmx->pid_table)
                return 0;
 
-       pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
+       pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+                           vmx_get_pid_table_order(kvm));
        if (!pages)
                return -ENOMEM;
 
@@ -5364,18 +5400,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
                val = (val & ~vmcs12->cr0_guest_host_mask) |
                        (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
 
-               if (!nested_guest_cr0_valid(vcpu, val))
-                       return 1;
-
                if (kvm_set_cr0(vcpu, val))
                        return 1;
                vmcs_writel(CR0_READ_SHADOW, orig_val);
                return 0;
        } else {
-               if (to_vmx(vcpu)->nested.vmxon &&
-                   !nested_host_cr0_valid(vcpu, val))
-                       return 1;
-
                return kvm_set_cr0(vcpu, val);
        }
 }
@@ -8203,6 +8232,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .set_segment = vmx_set_segment,
        .get_cpl = vmx_get_cpl,
        .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+       .is_valid_cr0 = vmx_is_valid_cr0,
        .set_cr0 = vmx_set_cr0,
        .is_valid_cr4 = vmx_is_valid_cr4,
        .set_cr4 = vmx_set_cr4,
index ce47dc2..33af7b4 100644 (file)
@@ -10,7 +10,7 @@
 #include "vmcs.h"
 #include "../x86.h"
 
-void vmread_error(unsigned long field, bool fault);
+void vmread_error(unsigned long field);
 void vmwrite_error(unsigned long field, unsigned long value);
 void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
 void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
@@ -31,6 +31,13 @@ void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
  * void vmread_error_trampoline(unsigned long field, bool fault);
  */
 extern unsigned long vmread_error_trampoline;
+
+/*
+ * The second VMREAD error trampoline, called from the assembly trampoline,
+ * exists primarily to enable instrumentation for the VM-Fail path.
+ */
+void vmread_error_trampoline2(unsigned long field, bool fault);
+
 #endif
 
 static __always_inline void vmcs_check16(unsigned long field)
@@ -101,8 +108,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
 
 do_fail:
        instrumentation_begin();
-       WARN_ONCE(1, KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
-       pr_warn_ratelimited(KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
+       vmread_error(field);
        instrumentation_end();
        return 0;
 
index a6b9bea..c381770 100644 (file)
@@ -906,6 +906,22 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
 }
 EXPORT_SYMBOL_GPL(load_pdptrs);
 
+static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+#ifdef CONFIG_X86_64
+       if (cr0 & 0xffffffff00000000UL)
+               return false;
+#endif
+
+       if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
+               return false;
+
+       if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
+               return false;
+
+       return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0);
+}
+
 void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
 {
        /*
@@ -952,20 +968,13 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 {
        unsigned long old_cr0 = kvm_read_cr0(vcpu);
 
-       cr0 |= X86_CR0_ET;
-
-#ifdef CONFIG_X86_64
-       if (cr0 & 0xffffffff00000000UL)
+       if (!kvm_is_valid_cr0(vcpu, cr0))
                return 1;
-#endif
-
-       cr0 &= ~CR0_RESERVED_BITS;
 
-       if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
-               return 1;
+       cr0 |= X86_CR0_ET;
 
-       if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
-               return 1;
+       /* Write to CR0 reserved bits are ignored, even on Intel. */
+       cr0 &= ~CR0_RESERVED_BITS;
 
 #ifdef CONFIG_X86_64
        if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
@@ -1607,7 +1616,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
         ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
         ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
         ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
-        ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
+        ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
 
 static u64 kvm_get_arch_capabilities(void)
 {
@@ -1664,6 +1673,9 @@ static u64 kvm_get_arch_capabilities(void)
                 */
        }
 
+       if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated())
+               data |= ARCH_CAP_GDS_NO;
+
        return data;
 }
 
@@ -2172,6 +2184,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
        u64 data;
        fastpath_t ret = EXIT_FASTPATH_NONE;
 
+       kvm_vcpu_srcu_read_lock(vcpu);
+
        switch (msr) {
        case APIC_BASE_MSR + (APIC_ICR >> 4):
                data = kvm_read_edx_eax(vcpu);
@@ -2194,6 +2208,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
        if (ret != EXIT_FASTPATH_NONE)
                trace_kvm_msr_write(msr, data);
 
+       kvm_vcpu_srcu_read_unlock(vcpu);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
@@ -10203,9 +10219,13 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
                if (r < 0)
                        goto out;
                if (r) {
-                       kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
-                       static_call(kvm_x86_inject_irq)(vcpu, false);
-                       WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
+                       int irq = kvm_cpu_get_interrupt(vcpu);
+
+                       if (!WARN_ON_ONCE(irq == -1)) {
+                               kvm_queue_interrupt(vcpu, irq, false);
+                               static_call(kvm_x86_inject_irq)(vcpu, false);
+                               WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
+                       }
                }
                if (kvm_cpu_has_injectable_intr(vcpu))
                        static_call(kvm_x86_enable_irq_window)(vcpu);
@@ -11460,7 +11480,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
                        return false;
        }
 
-       return kvm_is_valid_cr4(vcpu, sregs->cr4);
+       return kvm_is_valid_cr4(vcpu, sregs->cr4) &&
+              kvm_is_valid_cr0(vcpu, sregs->cr0);
 }
 
 static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
@@ -13185,7 +13206,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
 
 bool kvm_arch_has_irq_bypass(void)
 {
-       return true;
+       return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
 }
 
 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
index 3fd066d..cd86aeb 100644 (file)
@@ -11,8 +11,9 @@
 #include <asm/unwind_hints.h>
 #include <asm/percpu.h>
 #include <asm/frame.h>
+#include <asm/nops.h>
 
-       .section .text.__x86.indirect_thunk
+       .section .text..__x86.indirect_thunk
 
 
 .macro POLINE reg
@@ -131,36 +132,107 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
  */
 #ifdef CONFIG_RETHUNK
 
-       .section .text.__x86.return_thunk
+/*
+ * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
+ * special addresses:
+ *
+ * - srso_alias_untrain_ret() is 2M aligned
+ * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
+ * and 20 in its virtual address are set (while those bits in the
+ * srso_alias_untrain_ret() function are cleared).
+ *
+ * This guarantees that those two addresses will alias in the branch
+ * target buffer of Zen3/4 generations, leading to any potential
+ * poisoned entries at that BTB slot to get evicted.
+ *
+ * As a result, srso_alias_safe_ret() becomes a safe return.
+ */
+#ifdef CONFIG_CPU_SRSO
+       .section .text..__x86.rethunk_untrain
+
+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       UNWIND_HINT_FUNC
+       ANNOTATE_NOENDBR
+       ASM_NOP2
+       lfence
+       jmp srso_alias_return_thunk
+SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
+
+       .section .text..__x86.rethunk_safe
+#else
+/* dummy definition for alternatives */
+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       ANNOTATE_UNRET_SAFE
+       ret
+       int3
+SYM_FUNC_END(srso_alias_untrain_ret)
+#endif
+
+SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       lea 8(%_ASM_SP), %_ASM_SP
+       UNWIND_HINT_FUNC
+       ANNOTATE_UNRET_SAFE
+       ret
+       int3
+SYM_FUNC_END(srso_alias_safe_ret)
+
+       .section .text..__x86.return_thunk
+
+SYM_CODE_START(srso_alias_return_thunk)
+       UNWIND_HINT_FUNC
+       ANNOTATE_NOENDBR
+       call srso_alias_safe_ret
+       ud2
+SYM_CODE_END(srso_alias_return_thunk)
+
+/*
+ * Some generic notes on the untraining sequences:
+ *
+ * They are interchangeable when it comes to flushing potentially wrong
+ * RET predictions from the BTB.
+ *
+ * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
+ * Retbleed sequence because the return sequence done there
+ * (srso_safe_ret()) is longer and the return sequence must fully nest
+ * (end before) the untraining sequence. Therefore, the untraining
+ * sequence must fully overlap the return sequence.
+ *
+ * Regarding alignment - the instructions which need to be untrained,
+ * must all start at a cacheline boundary for Zen1/2 generations. That
+ * is, instruction sequences starting at srso_safe_ret() and
+ * the respective instruction sequences at retbleed_return_thunk()
+ * must start at a cacheline boundary.
+ */
 
 /*
  * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
- * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
  *    alignment within the BTB.
- * 2) The instruction at zen_untrain_ret must contain, and not
+ * 2) The instruction at retbleed_untrain_ret must contain, and not
  *    end with, the 0xc3 byte of the RET.
  * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
  *    from re-poisioning the BTB prediction.
  */
        .align 64
-       .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc
-SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
+SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
        ANNOTATE_NOENDBR
        /*
-        * As executed from zen_untrain_ret, this is:
+        * As executed from retbleed_untrain_ret, this is:
         *
         *   TEST $0xcc, %bl
         *   LFENCE
-        *   JMP __x86_return_thunk
+        *   JMP retbleed_return_thunk
         *
         * Executing the TEST instruction has a side effect of evicting any BTB
         * prediction (potentially attacker controlled) attached to the RET, as
-        * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+        * retbleed_return_thunk + 1 isn't an instruction boundary at the moment.
         */
        .byte   0xf6
 
        /*
-        * As executed from __x86_return_thunk, this is a plain RET.
+        * As executed from retbleed_return_thunk, this is a plain RET.
         *
         * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
         *
@@ -172,13 +244,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
         * With SMT enabled and STIBP active, a sibling thread cannot poison
         * RET's prediction to a type of its choice, but can evict the
         * prediction due to competitive sharing. If the prediction is
-        * evicted, __x86_return_thunk will suffer Straight Line Speculation
+        * evicted, retbleed_return_thunk will suffer Straight Line Speculation
         * which will be contained safely by the INT3.
         */
-SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL)
        ret
        int3
-SYM_CODE_END(__x86_return_thunk)
+SYM_CODE_END(retbleed_return_thunk)
 
        /*
         * Ensure the TEST decoding / BTB invalidation is complete.
@@ -189,11 +261,67 @@ SYM_CODE_END(__x86_return_thunk)
         * Jump back and execute the RET in the middle of the TEST instruction.
         * INT3 is for SLS protection.
         */
-       jmp __x86_return_thunk
+       jmp retbleed_return_thunk
        int3
-SYM_FUNC_END(zen_untrain_ret)
-__EXPORT_THUNK(zen_untrain_ret)
+SYM_FUNC_END(retbleed_untrain_ret)
+__EXPORT_THUNK(retbleed_untrain_ret)
 
+/*
+ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+ * above. On kernel entry, srso_untrain_ret() is executed which is a
+ *
+ * movabs $0xccccc30824648d48,%rax
+ *
+ * and when the return thunk executes the inner label srso_safe_ret()
+ * later, it is a stack manipulation and a RET which is mispredicted and
+ * thus a "safe" one to use.
+ */
+       .align 64
+       .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
+SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+       ANNOTATE_NOENDBR
+       .byte 0x48, 0xb8
+
+/*
+ * This forces the function return instruction to speculate into a trap
+ * (UD2 in srso_return_thunk() below).  This RET will then mispredict
+ * and execution will continue at the return site read from the top of
+ * the stack.
+ */
+SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+       lea 8(%_ASM_SP), %_ASM_SP
+       ret
+       int3
+       int3
+       /* end of movabs */
+       lfence
+       call srso_safe_ret
+       ud2
+SYM_CODE_END(srso_safe_ret)
+SYM_FUNC_END(srso_untrain_ret)
+__EXPORT_THUNK(srso_untrain_ret)
+
+SYM_CODE_START(srso_return_thunk)
+       UNWIND_HINT_FUNC
+       ANNOTATE_NOENDBR
+       call srso_safe_ret
+       ud2
+SYM_CODE_END(srso_return_thunk)
+
+SYM_FUNC_START(entry_untrain_ret)
+       ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
+                     "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
+                     "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+SYM_FUNC_END(entry_untrain_ret)
+__EXPORT_THUNK(entry_untrain_ret)
+
+SYM_CODE_START(__x86_return_thunk)
+       UNWIND_HINT_FUNC
+       ANNOTATE_NOENDBR
+       ANNOTATE_UNRET_SAFE
+       ret
+       int3
+SYM_CODE_END(__x86_return_thunk)
 EXPORT_SYMBOL(__x86_return_thunk)
 
 #endif /* CONFIG_RETHUNK */
index 8192452..ffa25e9 100644 (file)
@@ -20,7 +20,6 @@
 #include <asm/tlb.h>
 #include <asm/proto.h>
 #include <asm/dma.h>           /* for MAX_DMA_PFN */
-#include <asm/microcode.h>
 #include <asm/kaslr.h>
 #include <asm/hypervisor.h>
 #include <asm/cpufeature.h>
@@ -273,7 +272,7 @@ static void __init probe_page_size_mask(void)
 static const struct x86_cpu_id invlpg_miss_ids[] = {
        INTEL_MATCH(INTEL_FAM6_ALDERLAKE   ),
        INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ),
-       INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ),
+       INTEL_MATCH(INTEL_FAM6_ATOM_GRACEMONT ),
        INTEL_MATCH(INTEL_FAM6_RAPTORLAKE  ),
        INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P),
        INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S),
index 54bbd51..6faea41 100644 (file)
@@ -288,11 +288,10 @@ static bool amd_enc_cache_flush_required(void)
        return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT);
 }
 
-static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
 {
 #ifdef CONFIG_PARAVIRT
-       unsigned long sz = npages << PAGE_SHIFT;
-       unsigned long vaddr_end = vaddr + sz;
+       unsigned long vaddr_end = vaddr + size;
 
        while (vaddr < vaddr_end) {
                int psize, pmask, level;
@@ -342,7 +341,7 @@ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool e
                snp_set_memory_private(vaddr, npages);
 
        if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
-               enc_dec_hypercall(vaddr, npages, enc);
+               enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc);
 
        return true;
 }
@@ -466,7 +465,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
 
        ret = 0;
 
-       early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
+       early_set_mem_enc_dec_hypercall(start, size, enc);
 out:
        __flush_tlb_all();
        return ret;
@@ -482,9 +481,9 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
        return early_set_memory_enc_dec(vaddr, size, true);
 }
 
-void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
 {
-       enc_dec_hypercall(vaddr, npages, enc);
+       enc_dec_hypercall(vaddr, size, enc);
 }
 
 void __init sme_early_init(void)
index c69f847..4ef20b4 100644 (file)
@@ -82,7 +82,7 @@ int __init efi_memmap_alloc(unsigned int num_entries,
 
 /**
  * efi_memmap_install - Install a new EFI memory map in efi.memmap
- * @ctx: map allocation parameters (address, size, flags)
+ * @data: efi memmap installation parameters
  *
  * Unlike efi_memmap_init_*(), this function does not allow the caller
  * to switch from early to late mappings. It simply uses the existing
index a60af02..a6ab43f 100644 (file)
@@ -202,21 +202,17 @@ static int param_set_action(const char *val, const struct kernel_param *kp)
 {
        int i;
        int n = ARRAY_SIZE(valid_acts);
-       char arg[ACTION_LEN], *p;
+       char arg[ACTION_LEN];
 
        /* (remove possible '\n') */
-       strncpy(arg, val, ACTION_LEN - 1);
-       arg[ACTION_LEN - 1] = '\0';
-       p = strchr(arg, '\n');
-       if (p)
-               *p = '\0';
+       strscpy(arg, val, strnchrnul(val, sizeof(arg)-1, '\n') - val + 1);
 
        for (i = 0; i < n; i++)
                if (!strcmp(arg, valid_acts[i].action))
                        break;
 
        if (i < n) {
-               strcpy(uv_nmi_action, arg);
+               strscpy(uv_nmi_action, arg, sizeof(uv_nmi_action));
                pr_info("UV: New NMI action:%s\n", uv_nmi_action);
                return 0;
        }
@@ -959,7 +955,7 @@ static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs)
 
                /* Unexpected return, revert action to "dump" */
                if (master)
-                       strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action));
+                       strscpy(uv_nmi_action, "dump", sizeof(uv_nmi_action));
        }
 
        /* Pause as all CPU's enter the NMI handler */
index 7558139..aea47e7 100644 (file)
@@ -14,6 +14,7 @@
 #include <crypto/sha2.h>
 #include <asm/purgatory.h>
 
+#include "../boot/compressed/error.h"
 #include "../boot/string.h"
 
 u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(".kexec-purgatory");
index 93b6582..27fc170 100644 (file)
@@ -79,7 +79,7 @@
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
 #include <asm/acpi.h>
-#include <acpi/pdc_intel.h>
+#include <acpi/proc_cap_intel.h>
 #include <acpi/processor.h>
 #include <xen/interface/platform.h>
 #endif
@@ -288,17 +288,17 @@ static bool __init xen_check_mwait(void)
 
        native_cpuid(&ax, &bx, &cx, &dx);
 
-       /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
+       /* Ask the Hypervisor whether to clear ACPI_PROC_CAP_C_C2C3_FFH. If so,
         * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
         */
        buf[0] = ACPI_PDC_REVISION_ID;
        buf[1] = 1;
-       buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
+       buf[2] = (ACPI_PROC_CAP_C_CAPABILITY_SMP | ACPI_PROC_CAP_EST_CAPABILITY_SWSMP);
 
        set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
 
        if ((HYPERVISOR_platform_op(&op) == 0) &&
-           (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
+           (buf[2] & (ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH))) {
                cpuid_leaf5_ecx_val = cx;
                cpuid_leaf5_edx_val = dx;
        }
@@ -523,7 +523,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr)
        BUG_ON(size > PAGE_SIZE);
        BUG_ON(va & ~PAGE_MASK);
 
-       pfn = virt_to_pfn(va);
+       pfn = virt_to_pfn((void *)va);
        mfn = pfn_to_mfn(pfn);
 
        pte = pfn_pte(pfn, PAGE_KERNEL_RO);
index e0a9751..eb3bac0 100644 (file)
@@ -2202,13 +2202,13 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
                mcs = __xen_mc_entry(0);
 
                if (in_frames)
-                       in_frames[i] = virt_to_mfn(vaddr);
+                       in_frames[i] = virt_to_mfn((void *)vaddr);
 
                MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
-               __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
+               __set_phys_to_machine(virt_to_pfn((void *)vaddr), INVALID_P2M_ENTRY);
 
                if (out_frames)
-                       out_frames[i] = virt_to_pfn(vaddr);
+                       out_frames[i] = virt_to_pfn((void *)vaddr);
        }
        xen_mc_issue(0);
 }
@@ -2250,7 +2250,7 @@ static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
                MULTI_update_va_mapping(mcs.mc, vaddr,
                                mfn_pte(mfn, PAGE_KERNEL), flags);
 
-               set_phys_to_machine(virt_to_pfn(vaddr), mfn);
+               set_phys_to_machine(virt_to_pfn((void *)vaddr), mfn);
        }
 
        xen_mc_issue(0);
@@ -2310,12 +2310,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
        int            success;
        unsigned long vstart = (unsigned long)phys_to_virt(pstart);
 
-       /*
-        * Currently an auto-translated guest will not perform I/O, nor will
-        * it require PAE page directories below 4GB. Therefore any calls to
-        * this function are redundant and can be ignored.
-        */
-
        if (unlikely(order > MAX_CONTIG_ORDER))
                return -ENOMEM;
 
@@ -2327,7 +2321,7 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
        xen_zap_pfn_range(vstart, order, in_frames, NULL);
 
        /* 2. Get a new contiguous memory extent. */
-       out_frame = virt_to_pfn(vstart);
+       out_frame = virt_to_pfn((void *)vstart);
        success = xen_exchange_memory(1UL << order, 0, in_frames,
                                      1, order, &out_frame,
                                      address_bits);
@@ -2360,7 +2354,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
        spin_lock_irqsave(&xen_reservation_lock, flags);
 
        /* 1. Find start MFN of contiguous extent. */
-       in_frame = virt_to_mfn(vstart);
+       in_frame = virt_to_mfn((void *)vstart);
 
        /* 2. Zap current PTEs. */
        xen_zap_pfn_range(vstart, order, NULL, out_frames);
index 8b5cf7b..50c998b 100644 (file)
@@ -340,7 +340,7 @@ static void __init xen_do_set_identity_and_remap_chunk(
 
        WARN_ON(size == 0);
 
-       mfn_save = virt_to_mfn(buf);
+       mfn_save = virt_to_mfn((void *)buf);
 
        for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
             ident_pfn_iter < ident_end_pfn;
@@ -503,7 +503,7 @@ void __init xen_remap_memory(void)
        unsigned long pfn_s = ~0UL;
        unsigned long len = 0;
 
-       mfn_save = virt_to_mfn(buf);
+       mfn_save = virt_to_mfn((void *)buf);
 
        while (xen_remap_mfn != INVALID_P2M_ENTRY) {
                /* Map the remap information */
index 643d029..a0ea285 100644 (file)
@@ -90,30 +90,35 @@ SYM_CODE_END(xen_cpu_bringup_again)
        ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
        ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz "2.6")
        ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz "xen-3.0")
-#ifdef CONFIG_X86_32
-       ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __PAGE_OFFSET)
-#else
+#ifdef CONFIG_XEN_PV
        ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      _ASM_PTR __START_KERNEL_map)
        /* Map the p2m table to a 512GB-aligned user address. */
        ELFNOTE(Xen, XEN_ELFNOTE_INIT_P2M,       .quad (PUD_SIZE * PTRS_PER_PUD))
-#endif
-#ifdef CONFIG_XEN_PV
        ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          _ASM_PTR startup_xen)
-#endif
-       ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
-       ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,
-               .ascii "!writable_page_tables|pae_pgdir_above_4gb")
-       ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
-               .long (1 << XENFEAT_writable_page_tables) |       \
-                     (1 << XENFEAT_dom0) |                       \
-                     (1 << XENFEAT_linux_rsdp_unrestricted))
+       ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .ascii "!writable_page_tables")
        ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz "yes")
-       ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
        ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,
                .quad _PAGE_PRESENT; .quad _PAGE_PRESENT)
-       ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
        ELFNOTE(Xen, XEN_ELFNOTE_MOD_START_PFN,  .long 1)
-       ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   _ASM_PTR __HYPERVISOR_VIRT_START)
        ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   _ASM_PTR 0)
+# define FEATURES_PV (1 << XENFEAT_writable_page_tables)
+#else
+# define FEATURES_PV 0
+#endif
+#ifdef CONFIG_XEN_PVH
+# define FEATURES_PVH (1 << XENFEAT_linux_rsdp_unrestricted)
+#else
+# define FEATURES_PVH 0
+#endif
+#ifdef CONFIG_XEN_DOM0
+# define FEATURES_DOM0 (1 << XENFEAT_dom0)
+#else
+# define FEATURES_DOM0 0
+#endif
+       ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page)
+       ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES,
+               .long FEATURES_PV | FEATURES_PVH | FEATURES_DOM0)
+       ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz "generic")
+       ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1)
 
 #endif /*CONFIG_XEN */
index 20d6b49..ee97edc 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * arch/xtensa/kernel/align.S
  *
- * Handle unalignment exceptions in kernel space.
+ * Handle unalignment and load/store exceptions.
  *
  * This file is subject to the terms and conditions of the GNU General
  * Public License.  See the file "COPYING" in the main directory of
 #define LOAD_EXCEPTION_HANDLER
 #endif
 
-#if XCHAL_UNALIGNED_STORE_EXCEPTION || defined LOAD_EXCEPTION_HANDLER
+#if XCHAL_UNALIGNED_STORE_EXCEPTION || defined CONFIG_XTENSA_LOAD_STORE
+#define STORE_EXCEPTION_HANDLER
+#endif
+
+#if defined LOAD_EXCEPTION_HANDLER || defined STORE_EXCEPTION_HANDLER
 #define ANY_EXCEPTION_HANDLER
 #endif
 
-#if XCHAL_HAVE_WINDOWED
+#if XCHAL_HAVE_WINDOWED && defined CONFIG_MMU
 #define UNALIGNED_USER_EXCEPTION
 #endif
 
-/*  First-level exception handler for unaligned exceptions.
- *
- *  Note: This handler works only for kernel exceptions.  Unaligned user
- *        access should get a seg fault.
- */
-
 /* Big and little endian 16-bit values are located in
  * different halves of a register.  HWORD_START helps to
  * abstract the notion of extracting a 16-bit value from a
@@ -228,8 +226,6 @@ ENDPROC(fast_load_store)
 #ifdef ANY_EXCEPTION_HANDLER
 ENTRY(fast_unaligned)
 
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
-
        call0   .Lsave_and_load_instruction
 
        /* Analyze the instruction (load or store?). */
@@ -244,8 +240,7 @@ ENTRY(fast_unaligned)
        /* 'store indicator bit' not set, jump */
        _bbci.l a4, OP1_SI_BIT + INSN_OP1, .Lload
 
-#endif
-#if XCHAL_UNALIGNED_STORE_EXCEPTION
+#ifdef STORE_EXCEPTION_HANDLER
 
        /* Store: Jump to table entry to get the value in the source register.*/
 
@@ -254,7 +249,7 @@ ENTRY(fast_unaligned)
        addx8   a5, a6, a5
        jx      a5                      # jump into table
 #endif
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION
+#ifdef LOAD_EXCEPTION_HANDLER
 
        /* Load: Load memory address. */
 
@@ -328,7 +323,7 @@ ENTRY(fast_unaligned)
        mov     a14, a3         ;       _j .Lexit;      .align 8
        mov     a15, a3         ;       _j .Lexit;      .align 8
 #endif
-#if XCHAL_UNALIGNED_STORE_EXCEPTION
+#ifdef STORE_EXCEPTION_HANDLER
 .Lstore_table:
        l32i    a3, a2, PT_AREG0;       _j .Lstore_w;   .align 8
        mov     a3, a1;                 _j .Lstore_w;   .align 8        # fishy??
@@ -348,7 +343,6 @@ ENTRY(fast_unaligned)
        mov     a3, a15         ;       _j .Lstore_w;   .align 8
 #endif
 
-#ifdef ANY_EXCEPTION_HANDLER
        /* We cannot handle this exception. */
 
        .extern _kernel_exception
@@ -377,8 +371,8 @@ ENTRY(fast_unaligned)
 
 2:     movi    a0, _user_exception
        jx      a0
-#endif
-#if XCHAL_UNALIGNED_STORE_EXCEPTION
+
+#ifdef STORE_EXCEPTION_HANDLER
 
        # a7: instruction pointer, a4: instruction, a3: value
 .Lstore_w:
@@ -444,7 +438,7 @@ ENTRY(fast_unaligned)
        s32i    a6, a4, 4
 #endif
 #endif
-#ifdef ANY_EXCEPTION_HANDLER
+
 .Lexit:
 #if XCHAL_HAVE_LOOPS
        rsr     a4, lend                # check if we reached LEND
@@ -539,7 +533,7 @@ ENTRY(fast_unaligned)
        __src_b a4, a4, a5      # a4 has the instruction
 
        ret
-#endif
+
 ENDPROC(fast_unaligned)
 
 ENTRY(fast_unaligned_fixup)
index 2b69c3c..fc1a4f3 100644 (file)
 449    common  futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
 451    common  cachestat                       sys_cachestat
+452    common  fchmodat2                       sys_fchmodat2
index 17eb180..427c125 100644 (file)
@@ -102,7 +102,8 @@ static dispatch_init_table_t __initdata dispatch_init_table[] = {
 #endif
 { EXCCAUSE_INTEGER_DIVIDE_BY_ZERO, 0,     do_div0 },
 /* EXCCAUSE_PRIVILEGED unhandled */
-#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION
+#if XCHAL_UNALIGNED_LOAD_EXCEPTION || XCHAL_UNALIGNED_STORE_EXCEPTION || \
+               IS_ENABLED(CONFIG_XTENSA_LOAD_STORE)
 #ifdef CONFIG_XTENSA_UNALIGNED_USER
 { EXCCAUSE_UNALIGNED,          USER,      fast_unaligned },
 #endif
index 9ac46ab..85c82cd 100644 (file)
@@ -237,7 +237,7 @@ static int tuntap_probe(struct iss_net_private *lp, int index, char *init)
 
        init += sizeof(TRANSPORT_TUNTAP_NAME) - 1;
        if (*init == ',') {
-               rem = split_if_spec(init + 1, &mac_str, &dev_name);
+               rem = split_if_spec(init + 1, &mac_str, &dev_name, NULL);
                if (rem != NULL) {
                        pr_err("%s: extra garbage on specification : '%s'\n",
                               dev->name, rem);
@@ -540,6 +540,7 @@ static void iss_net_configure(int index, char *init)
                rtnl_unlock();
                pr_err("%s: error registering net device!\n", dev->name);
                platform_device_unregister(&lp->pdev);
+               /* dev is freed by the iss_net_pdev_release callback */
                return;
        }
        rtnl_unlock();
index 979e28a..f3b13aa 100644 (file)
@@ -206,23 +206,6 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
 }
 EXPORT_SYMBOL(sync_blockdev_range);
 
-/*
- * Write out and wait upon all dirty data associated with this
- * device.   Filesystem data as well as the underlying block
- * device.  Takes the superblock lock.
- */
-int fsync_bdev(struct block_device *bdev)
-{
-       struct super_block *sb = get_super(bdev);
-       if (sb) {
-               int res = sync_filesystem(sb);
-               drop_super(sb);
-               return res;
-       }
-       return sync_blockdev(bdev);
-}
-EXPORT_SYMBOL(fsync_bdev);
-
 /**
  * freeze_bdev - lock a filesystem and force it into a consistent state
  * @bdev:      blockdevice to lock
@@ -248,9 +231,9 @@ int freeze_bdev(struct block_device *bdev)
        if (!sb)
                goto sync;
        if (sb->s_op->freeze_super)
-               error = sb->s_op->freeze_super(sb);
+               error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
        else
-               error = freeze_super(sb);
+               error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
        deactivate_super(sb);
 
        if (error) {
@@ -291,9 +274,9 @@ int thaw_bdev(struct block_device *bdev)
                goto out;
 
        if (sb->s_op->thaw_super)
-               error = sb->s_op->thaw_super(sb);
+               error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
        else
-               error = thaw_super(sb);
+               error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
        if (error)
                bdev->bd_fsfreeze_count++;
        else
@@ -960,26 +943,38 @@ out_path_put:
 }
 EXPORT_SYMBOL(lookup_bdev);
 
-int __invalidate_device(struct block_device *bdev, bool kill_dirty)
+/**
+ * bdev_mark_dead - mark a block device as dead
+ * @bdev: block device to operate on
+ * @surprise: indicate a surprise removal
+ *
+ * Tell the file system that this devices or media is dead.  If @surprise is set
+ * to %true the device or media is already gone, if not we are preparing for an
+ * orderly removal.
+ *
+ * This calls into the file system, which then typicall syncs out all dirty data
+ * and writes back inodes and then invalidates any cached data in the inodes on
+ * the file system.  In addition we also invalidate the block device mapping.
+ */
+void bdev_mark_dead(struct block_device *bdev, bool surprise)
 {
-       struct super_block *sb = get_super(bdev);
-       int res = 0;
+       mutex_lock(&bdev->bd_holder_lock);
+       if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
+               bdev->bd_holder_ops->mark_dead(bdev, surprise);
+       else
+               sync_blockdev(bdev);
+       mutex_unlock(&bdev->bd_holder_lock);
 
-       if (sb) {
-               /*
-                * no need to lock the super, get_super holds the
-                * read mutex so the filesystem cannot go away
-                * under us (->put_super runs with the write lock
-                * hold).
-                */
-               shrink_dcache_sb(sb);
-               res = invalidate_inodes(sb, kill_dirty);
-               drop_super(sb);
-       }
        invalidate_bdev(bdev);
-       return res;
 }
-EXPORT_SYMBOL(__invalidate_device);
+#ifdef CONFIG_DASD_MODULE
+/*
+ * Drivers should not use this directly, but the DASD driver has historically
+ * had a shutdown to offline mode that doesn't actually remove the gendisk
+ * that otherwise looks a lot like a safe device removal.
+ */
+EXPORT_SYMBOL_GPL(bdev_mark_dead);
+#endif
 
 void sync_bdevs(bool wait)
 {
index fc49be6..9faafcd 100644 (file)
@@ -136,7 +136,9 @@ static void blkg_free_workfn(struct work_struct *work)
                        blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
        if (blkg->parent)
                blkg_put(blkg->parent);
+       spin_lock_irq(&q->queue_lock);
        list_del_init(&blkg->q_node);
+       spin_unlock_irq(&q->queue_lock);
        mutex_unlock(&q->blkcg_mutex);
 
        blk_put_queue(q);
index 99d8b98..9866468 100644 (file)
@@ -722,14 +722,9 @@ void submit_bio_noacct(struct bio *bio)
        struct block_device *bdev = bio->bi_bdev;
        struct request_queue *q = bdev_get_queue(bdev);
        blk_status_t status = BLK_STS_IOERR;
-       struct blk_plug *plug;
 
        might_sleep();
 
-       plug = blk_mq_plug(bio);
-       if (plug && plug->nowait)
-               bio->bi_opf |= REQ_NOWAIT;
-
        /*
         * For a REQ_NOWAIT based request, return -EOPNOTSUPP
         * if queue does not support NOWAIT.
@@ -1059,7 +1054,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
        plug->rq_count = 0;
        plug->multiple_queues = false;
        plug->has_elevator = false;
-       plug->nowait = false;
        INIT_LIST_HEAD(&plug->cb_list);
 
        /*
@@ -1144,8 +1138,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
 {
        if (!list_empty(&plug->cb_list))
                flush_plug_callbacks(plug, from_schedule);
-       if (!rq_list_empty(plug->mq_list))
-               blk_mq_flush_plug_list(plug, from_schedule);
+       blk_mq_flush_plug_list(plug, from_schedule);
        /*
         * Unconditionally flush out cached requests, even if the unplug
         * event came from schedule. Since we know hold references to the
index ad9844c..e6468ea 100644 (file)
@@ -78,7 +78,7 @@ static struct blk_crypto_fallback_keyslot {
        struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX];
 } *blk_crypto_keyslots;
 
-static struct blk_crypto_profile blk_crypto_fallback_profile;
+static struct blk_crypto_profile *blk_crypto_fallback_profile;
 static struct workqueue_struct *blk_crypto_wq;
 static mempool_t *blk_crypto_bounce_page_pool;
 static struct bio_set crypto_bio_split;
@@ -292,7 +292,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
         * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
         * this bio's algorithm and key.
         */
-       blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+       blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile,
                                        bc->bc_key, &slot);
        if (blk_st != BLK_STS_OK) {
                src_bio->bi_status = blk_st;
@@ -395,7 +395,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
         * Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
         * this bio's algorithm and key.
         */
-       blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+       blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile,
                                        bc->bc_key, &slot);
        if (blk_st != BLK_STS_OK) {
                bio->bi_status = blk_st;
@@ -499,7 +499,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
                return false;
        }
 
-       if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile,
+       if (!__blk_crypto_cfg_supported(blk_crypto_fallback_profile,
                                        &bc->bc_key->crypto_cfg)) {
                bio->bi_status = BLK_STS_NOTSUPP;
                return false;
@@ -526,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
 
 int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
 {
-       return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key);
+       return __blk_crypto_evict_key(blk_crypto_fallback_profile, key);
 }
 
 static bool blk_crypto_fallback_inited;
@@ -534,7 +534,6 @@ static int blk_crypto_fallback_init(void)
 {
        int i;
        int err;
-       struct blk_crypto_profile *profile = &blk_crypto_fallback_profile;
 
        if (blk_crypto_fallback_inited)
                return 0;
@@ -545,18 +544,27 @@ static int blk_crypto_fallback_init(void)
        if (err)
                goto out;
 
-       err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots);
-       if (err)
+       /* Dynamic allocation is needed because of lockdep_register_key(). */
+       blk_crypto_fallback_profile =
+               kzalloc(sizeof(*blk_crypto_fallback_profile), GFP_KERNEL);
+       if (!blk_crypto_fallback_profile) {
+               err = -ENOMEM;
                goto fail_free_bioset;
+       }
+
+       err = blk_crypto_profile_init(blk_crypto_fallback_profile,
+                                     blk_crypto_num_keyslots);
+       if (err)
+               goto fail_free_profile;
        err = -ENOMEM;
 
-       profile->ll_ops = blk_crypto_fallback_ll_ops;
-       profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
+       blk_crypto_fallback_profile->ll_ops = blk_crypto_fallback_ll_ops;
+       blk_crypto_fallback_profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
 
        /* All blk-crypto modes have a crypto API fallback. */
        for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++)
-               profile->modes_supported[i] = 0xFFFFFFFF;
-       profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
+               blk_crypto_fallback_profile->modes_supported[i] = 0xFFFFFFFF;
+       blk_crypto_fallback_profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
 
        blk_crypto_wq = alloc_workqueue("blk_crypto_wq",
                                        WQ_UNBOUND | WQ_HIGHPRI |
@@ -597,7 +605,9 @@ fail_free_keyslots:
 fail_free_wq:
        destroy_workqueue(blk_crypto_wq);
 fail_destroy_profile:
-       blk_crypto_profile_destroy(profile);
+       blk_crypto_profile_destroy(blk_crypto_fallback_profile);
+fail_free_profile:
+       kfree(blk_crypto_fallback_profile);
 fail_free_bioset:
        bioset_exit(&crypto_bio_split);
 out:
index 2a67d3f..7fabc88 100644 (file)
@@ -79,7 +79,14 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile,
        unsigned int slot_hashtable_size;
 
        memset(profile, 0, sizeof(*profile));
-       init_rwsem(&profile->lock);
+
+       /*
+        * profile->lock of an underlying device can nest inside profile->lock
+        * of a device-mapper device, so use a dynamic lock class to avoid
+        * false-positive lockdep reports.
+        */
+       lockdep_register_key(&profile->lockdep_key);
+       __init_rwsem(&profile->lock, "&profile->lock", &profile->lockdep_key);
 
        if (num_slots == 0)
                return 0;
@@ -89,7 +96,7 @@ int blk_crypto_profile_init(struct blk_crypto_profile *profile,
        profile->slots = kvcalloc(num_slots, sizeof(profile->slots[0]),
                                  GFP_KERNEL);
        if (!profile->slots)
-               return -ENOMEM;
+               goto err_destroy;
 
        profile->num_slots = num_slots;
 
@@ -435,6 +442,7 @@ void blk_crypto_profile_destroy(struct blk_crypto_profile *profile)
 {
        if (!profile)
                return;
+       lockdep_unregister_key(&profile->lockdep_key);
        kvfree(profile->slot_hashtable);
        kvfree_sensitive(profile->slots,
                         sizeof(profile->slots[0]) * profile->num_slots);
index dba392c..8220517 100644 (file)
@@ -189,7 +189,7 @@ static void blk_flush_complete_seq(struct request *rq,
        case REQ_FSEQ_DATA:
                list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
                spin_lock(&q->requeue_lock);
-               list_add_tail(&rq->queuelist, &q->flush_list);
+               list_add(&rq->queuelist, &q->requeue_list);
                spin_unlock(&q->requeue_lock);
                blk_mq_kick_requeue_list(q);
                break;
index 9dfcf54..089fcb9 100644 (file)
@@ -2516,6 +2516,10 @@ static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg,
        u64 seek_pages = 0;
        u64 cost = 0;
 
+       /* Can't calculate cost for empty bio */
+       if (!bio->bi_iter.bi_size)
+               goto out;
+
        switch (bio_op(bio)) {
        case REQ_OP_READ:
                coef_seqio      = ioc->params.lcoefs[LCOEF_RSEQIO];
@@ -3297,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
        if (qos[QOS_MIN] > qos[QOS_MAX])
                goto einval;
 
-       if (enable) {
+       if (enable && !ioc->enabled) {
                blk_stat_enable_accounting(disk->queue);
                blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
                ioc->enabled = true;
-       } else {
+       } else if (!enable && ioc->enabled) {
+               blk_stat_disable_accounting(disk->queue);
                blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
                ioc->enabled = false;
        }
index 5504719..953f083 100644 (file)
@@ -328,8 +328,24 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL(blk_rq_init);
 
+/* Set start and alloc time when the allocated request is actually used */
+static inline void blk_mq_rq_time_init(struct request *rq, u64 alloc_time_ns)
+{
+       if (blk_mq_need_time_stamp(rq))
+               rq->start_time_ns = ktime_get_ns();
+       else
+               rq->start_time_ns = 0;
+
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME
+       if (blk_queue_rq_alloc_time(rq->q))
+               rq->alloc_time_ns = alloc_time_ns ?: rq->start_time_ns;
+       else
+               rq->alloc_time_ns = 0;
+#endif
+}
+
 static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
-               struct blk_mq_tags *tags, unsigned int tag, u64 alloc_time_ns)
+               struct blk_mq_tags *tags, unsigned int tag)
 {
        struct blk_mq_ctx *ctx = data->ctx;
        struct blk_mq_hw_ctx *hctx = data->hctx;
@@ -356,14 +372,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
        }
        rq->timeout = 0;
 
-       if (blk_mq_need_time_stamp(rq))
-               rq->start_time_ns = ktime_get_ns();
-       else
-               rq->start_time_ns = 0;
        rq->part = NULL;
-#ifdef CONFIG_BLK_RQ_ALLOC_TIME
-       rq->alloc_time_ns = alloc_time_ns;
-#endif
        rq->io_start_time_ns = 0;
        rq->stats_sectors = 0;
        rq->nr_phys_segments = 0;
@@ -393,8 +402,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
 }
 
 static inline struct request *
-__blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
-               u64 alloc_time_ns)
+__blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
 {
        unsigned int tag, tag_offset;
        struct blk_mq_tags *tags;
@@ -413,7 +421,7 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
                tag = tag_offset + i;
                prefetch(tags->static_rqs[tag]);
                tag_mask &= ~(1UL << i);
-               rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns);
+               rq = blk_mq_rq_ctx_init(data, tags, tag);
                rq_list_add(data->cached_rq, rq);
                nr++;
        }
@@ -474,9 +482,11 @@ retry:
         * Try batched alloc if we want more than 1 tag.
         */
        if (data->nr_tags > 1) {
-               rq = __blk_mq_alloc_requests_batch(data, alloc_time_ns);
-               if (rq)
+               rq = __blk_mq_alloc_requests_batch(data);
+               if (rq) {
+                       blk_mq_rq_time_init(rq, alloc_time_ns);
                        return rq;
+               }
                data->nr_tags = 1;
        }
 
@@ -499,8 +509,9 @@ retry:
                goto retry;
        }
 
-       return blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag,
-                                       alloc_time_ns);
+       rq = blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag);
+       blk_mq_rq_time_init(rq, alloc_time_ns);
+       return rq;
 }
 
 static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
@@ -555,6 +566,7 @@ static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
                        return NULL;
 
                plug->cached_rq = rq_list_next(rq);
+               blk_mq_rq_time_init(rq, 0);
        }
 
        rq->cmd_flags = opf;
@@ -656,8 +668,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
        tag = blk_mq_get_tag(&data);
        if (tag == BLK_MQ_NO_TAG)
                goto out_queue_exit;
-       rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag,
-                                       alloc_time_ns);
+       rq = blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag);
+       blk_mq_rq_time_init(rq, alloc_time_ns);
        rq->__data_len = 0;
        rq->__sector = (sector_t) -1;
        rq->bio = rq->biotail = NULL;
@@ -669,6 +681,21 @@ out_queue_exit:
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
+static void blk_mq_finish_request(struct request *rq)
+{
+       struct request_queue *q = rq->q;
+
+       if (rq->rq_flags & RQF_USE_SCHED) {
+               q->elevator->type->ops.finish_request(rq);
+               /*
+                * For postflush request that may need to be
+                * completed twice, we should clear this flag
+                * to avoid double finish_request() on the rq.
+                */
+               rq->rq_flags &= ~RQF_USE_SCHED;
+       }
+}
+
 static void __blk_mq_free_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
@@ -695,9 +722,7 @@ void blk_mq_free_request(struct request *rq)
 {
        struct request_queue *q = rq->q;
 
-       if ((rq->rq_flags & RQF_USE_SCHED) &&
-           q->elevator->type->ops.finish_request)
-               q->elevator->type->ops.finish_request(rq);
+       blk_mq_finish_request(rq);
 
        if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
                laptop_io_completion(q->disk->bdi);
@@ -1008,6 +1033,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
        if (blk_mq_need_time_stamp(rq))
                __blk_mq_end_request_acct(rq, ktime_get_ns());
 
+       blk_mq_finish_request(rq);
+
        if (rq->end_io) {
                rq_qos_done(rq->q, rq);
                if (rq->end_io(rq, error) == RQ_END_IO_FREE)
@@ -1062,6 +1089,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
                if (iob->need_ts)
                        __blk_mq_end_request_acct(rq, now);
 
+               blk_mq_finish_request(rq);
+
                rq_qos_done(rq->q, rq);
 
                /*
@@ -2742,7 +2771,14 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 {
        struct request *rq;
 
-       if (rq_list_empty(plug->mq_list))
+       /*
+        * We may have been called recursively midway through handling
+        * plug->mq_list via a schedule() in the driver's queue_rq() callback.
+        * To avoid mq_list changing under our feet, clear rq_count early and
+        * bail out specifically if rq_count is 0 rather than checking
+        * whether the mq_list is empty.
+        */
+       if (plug->rq_count == 0)
                return;
        plug->rq_count = 0;
 
@@ -2896,6 +2932,7 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
        plug->cached_rq = rq_list_next(rq);
        rq_qos_throttle(q, *bio);
 
+       blk_mq_rq_time_init(rq, 0);
        rq->cmd_flags = (*bio)->bi_opf;
        INIT_LIST_HEAD(&rq->queuelist);
        return rq;
index 0f9f97c..619ee41 100644 (file)
@@ -442,7 +442,6 @@ struct blk_revalidate_zone_args {
        unsigned long   *conv_zones_bitmap;
        unsigned long   *seq_zones_wlock;
        unsigned int    nr_zones;
-       sector_t        zone_sectors;
        sector_t        sector;
 };
 
@@ -456,38 +455,34 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
        struct gendisk *disk = args->disk;
        struct request_queue *q = disk->queue;
        sector_t capacity = get_capacity(disk);
+       sector_t zone_sectors = q->limits.chunk_sectors;
+
+       /* Check for bad zones and holes in the zone report */
+       if (zone->start != args->sector) {
+               pr_warn("%s: Zone gap at sectors %llu..%llu\n",
+                       disk->disk_name, args->sector, zone->start);
+               return -ENODEV;
+       }
+
+       if (zone->start >= capacity || !zone->len) {
+               pr_warn("%s: Invalid zone start %llu, length %llu\n",
+                       disk->disk_name, zone->start, zone->len);
+               return -ENODEV;
+       }
 
        /*
         * All zones must have the same size, with the exception on an eventual
         * smaller last zone.
         */
-       if (zone->start == 0) {
-               if (zone->len == 0 || !is_power_of_2(zone->len)) {
-                       pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
-                               disk->disk_name, zone->len);
-                       return -ENODEV;
-               }
-
-               args->zone_sectors = zone->len;
-               args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
-       } else if (zone->start + args->zone_sectors < capacity) {
-               if (zone->len != args->zone_sectors) {
+       if (zone->start + zone->len < capacity) {
+               if (zone->len != zone_sectors) {
                        pr_warn("%s: Invalid zoned device with non constant zone size\n",
                                disk->disk_name);
                        return -ENODEV;
                }
-       } else {
-               if (zone->len > args->zone_sectors) {
-                       pr_warn("%s: Invalid zoned device with larger last zone size\n",
-                               disk->disk_name);
-                       return -ENODEV;
-               }
-       }
-
-       /* Check for holes in the zone report */
-       if (zone->start != args->sector) {
-               pr_warn("%s: Zone gap at sectors %llu..%llu\n",
-                       disk->disk_name, args->sector, zone->start);
+       } else if (zone->len > zone_sectors) {
+               pr_warn("%s: Invalid zoned device with larger last zone size\n",
+                       disk->disk_name);
                return -ENODEV;
        }
 
@@ -526,11 +521,13 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
  * @disk:      Target disk
  * @update_driver_data:        Callback to update driver data on the frozen disk
  *
- * Helper function for low-level device drivers to (re) allocate and initialize
- * a disk request queue zone bitmaps. This functions should normally be called
- * within the disk ->revalidate method for blk-mq based drivers.  For BIO based
- * drivers only q->nr_zones needs to be updated so that the sysfs exposed value
- * is correct.
+ * Helper function for low-level device drivers to check and (re) allocate and
+ * initialize a disk request queue zone bitmaps. This functions should normally
+ * be called within the disk ->revalidate method for blk-mq based drivers.
+ * Before calling this function, the device driver must already have set the
+ * device zone size (chunk_sector limit) and the max zone append limit.
+ * For BIO based drivers, this function cannot be used. BIO based device drivers
+ * only need to set disk->nr_zones so that the sysfs exposed value is correct.
  * If the @update_driver_data callback function is not NULL, the callback is
  * executed with the device request queue frozen after all zones have been
  * checked.
@@ -539,9 +536,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
                              void (*update_driver_data)(struct gendisk *disk))
 {
        struct request_queue *q = disk->queue;
-       struct blk_revalidate_zone_args args = {
-               .disk           = disk,
-       };
+       sector_t zone_sectors = q->limits.chunk_sectors;
+       sector_t capacity = get_capacity(disk);
+       struct blk_revalidate_zone_args args = { };
        unsigned int noio_flag;
        int ret;
 
@@ -550,13 +547,31 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
        if (WARN_ON_ONCE(!queue_is_mq(q)))
                return -EIO;
 
-       if (!get_capacity(disk))
-               return -EIO;
+       if (!capacity)
+               return -ENODEV;
+
+       /*
+        * Checks that the device driver indicated a valid zone size and that
+        * the max zone append limit is set.
+        */
+       if (!zone_sectors || !is_power_of_2(zone_sectors)) {
+               pr_warn("%s: Invalid non power of two zone size (%llu)\n",
+                       disk->disk_name, zone_sectors);
+               return -ENODEV;
+       }
+
+       if (!q->limits.max_zone_append_sectors) {
+               pr_warn("%s: Invalid 0 maximum zone append limit\n",
+                       disk->disk_name);
+               return -ENODEV;
+       }
 
        /*
         * Ensure that all memory allocations in this context are done as if
         * GFP_NOIO was specified.
         */
+       args.disk = disk;
+       args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors);
        noio_flag = memalloc_noio_save();
        ret = disk->fops->report_zones(disk, 0, UINT_MAX,
                                       blk_revalidate_zone_cb, &args);
@@ -570,7 +585,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
         * If zones where reported, make sure that the entire disk capacity
         * has been checked.
         */
-       if (ret > 0 && args.sector != get_capacity(disk)) {
+       if (ret > 0 && args.sector != capacity) {
                pr_warn("%s: Missing zones from sector %llu\n",
                        disk->disk_name, args.sector);
                ret = -ENODEV;
@@ -583,7 +598,6 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
         */
        blk_mq_freeze_queue(q);
        if (ret > 0) {
-               blk_queue_chunk_sectors(q, args.zone_sectors);
                disk->nr_zones = args.nr_zones;
                swap(disk->seq_zones_wlock, args.seq_zones_wlock);
                swap(disk->conv_zones_bitmap, args.conv_zones_bitmap);
index 0cfac46..422db82 100644 (file)
@@ -281,9 +281,7 @@ bool disk_check_media_change(struct gendisk *disk)
        if (!(events & DISK_EVENT_MEDIA_CHANGE))
                return false;
 
-       if (__invalidate_device(disk->part0, true))
-               pr_warn("VFS: busy inodes on changed media %s\n",
-                       disk->disk_name);
+       bdev_mark_dead(disk->part0, true);
        set_bit(GD_NEED_PART_SCAN, &disk->state);
        return true;
 }
@@ -294,25 +292,16 @@ EXPORT_SYMBOL(disk_check_media_change);
  * @disk: the disk which will raise the event
  * @events: the events to raise
  *
- * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present,
- * attempt to free all dentries and inodes and invalidates all block
+ * Should be called when the media changes for @disk.  Generates a uevent
+ * and attempts to free all dentries and inodes and invalidates all block
  * device page cache entries in that case.
- *
- * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not.
  */
-bool disk_force_media_change(struct gendisk *disk, unsigned int events)
+void disk_force_media_change(struct gendisk *disk)
 {
-       disk_event_uevent(disk, events);
-
-       if (!(events & DISK_EVENT_MEDIA_CHANGE))
-               return false;
-
+       disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE);
        inc_diskseq(disk);
-       if (__invalidate_device(disk->part0, true))
-               pr_warn("VFS: busy inodes on changed media %s\n",
-                       disk->disk_name);
+       bdev_mark_dead(disk->part0, true);
        set_bit(GD_NEED_PART_SCAN, &disk->state);
-       return true;
 }
 EXPORT_SYMBOL_GPL(disk_force_media_change);
 
index 8400e30..5ff093c 100644 (file)
@@ -499,6 +499,9 @@ void elv_unregister_queue(struct request_queue *q)
 
 int elv_register(struct elevator_type *e)
 {
+       /* finish request is mandatory */
+       if (WARN_ON_ONCE(!e->ops.finish_request))
+               return -EINVAL;
        /* insert_requests and dispatch_request are mandatory */
        if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
                return -EINVAL;
index a286bf3..838ffad 100644 (file)
@@ -358,13 +358,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
                task_io_account_write(bio->bi_iter.bi_size);
        }
 
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               bio->bi_opf |= REQ_NOWAIT;
+
        if (iocb->ki_flags & IOCB_HIPRI) {
-               bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
+               bio->bi_opf |= REQ_POLLED;
                submit_bio(bio);
                WRITE_ONCE(iocb->private, bio);
        } else {
-               if (iocb->ki_flags & IOCB_NOWAIT)
-                       bio->bi_opf |= REQ_NOWAIT;
                submit_bio(bio);
        }
        return -EIOCBQUEUED;
index 3d287b3..cc32a0c 100644 (file)
@@ -554,7 +554,7 @@ out_exit_elevator:
 }
 EXPORT_SYMBOL(device_add_disk);
 
-static void blk_report_disk_dead(struct gendisk *disk)
+static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
 {
        struct block_device *bdev;
        unsigned long idx;
@@ -565,10 +565,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
                        continue;
                rcu_read_unlock();
 
-               mutex_lock(&bdev->bd_holder_lock);
-               if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
-                       bdev->bd_holder_ops->mark_dead(bdev);
-               mutex_unlock(&bdev->bd_holder_lock);
+               bdev_mark_dead(bdev, surprise);
 
                put_device(&bdev->bd_device);
                rcu_read_lock();
@@ -576,14 +573,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
        rcu_read_unlock();
 }
 
-/**
- * blk_mark_disk_dead - mark a disk as dead
- * @disk: disk to mark as dead
- *
- * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
- * to this disk.
- */
-void blk_mark_disk_dead(struct gendisk *disk)
+static void __blk_mark_disk_dead(struct gendisk *disk)
 {
        /*
         * Fail any new I/O.
@@ -603,8 +593,19 @@ void blk_mark_disk_dead(struct gendisk *disk)
         * Prevent new I/O from crossing bio_queue_enter().
         */
        blk_queue_start_drain(disk->queue);
+}
 
-       blk_report_disk_dead(disk);
+/**
+ * blk_mark_disk_dead - mark a disk as dead
+ * @disk: disk to mark as dead
+ *
+ * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
+ * to this disk.
+ */
+void blk_mark_disk_dead(struct gendisk *disk)
+{
+       __blk_mark_disk_dead(disk);
+       blk_report_disk_dead(disk, true);
 }
 EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
 
@@ -641,18 +642,20 @@ void del_gendisk(struct gendisk *disk)
        disk_del_events(disk);
 
        /*
-        * Prevent new openers by unlinked the bdev inode, and write out
-        * dirty data before marking the disk dead and stopping all I/O.
+        * Prevent new openers by unlinked the bdev inode.
         */
        mutex_lock(&disk->open_mutex);
-       xa_for_each(&disk->part_tbl, idx, part) {
+       xa_for_each(&disk->part_tbl, idx, part)
                remove_inode_hash(part->bd_inode);
-               fsync_bdev(part);
-               __invalidate_device(part, true);
-       }
        mutex_unlock(&disk->open_mutex);
 
-       blk_mark_disk_dead(disk);
+       /*
+        * Tell the file system to write back all dirty data and shut down if
+        * it hasn't been notified earlier.
+        */
+       if (!test_bit(GD_DEAD, &disk->state))
+               blk_report_disk_dead(disk, false);
+       __blk_mark_disk_dead(disk);
 
        /*
         * Drop all partitions now that the disk is marked dead.
index 3be1194..648670d 100644 (file)
@@ -364,7 +364,14 @@ static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd,
 {
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
-       fsync_bdev(bdev);
+
+       mutex_lock(&bdev->bd_holder_lock);
+       if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync)
+               bdev->bd_holder_ops->sync(bdev);
+       else
+               sync_blockdev(bdev);
+       mutex_unlock(&bdev->bd_holder_lock);
+
        invalidate_bdev(bdev);
        return 0;
 }
index 6aa5daf..02a916b 100644 (file)
@@ -176,7 +176,7 @@ static inline struct request *deadline_from_pos(struct dd_per_prio *per_prio,
         * zoned writes, start searching from the start of a zone.
         */
        if (blk_rq_is_seq_zoned_write(rq))
-               pos -= round_down(pos, rq->q->limits.chunk_sectors);
+               pos = round_down(pos, rq->q->limits.chunk_sectors);
 
        while (node) {
                rq = rb_entry_rq(node);
index ed222b9..5069210 100644 (file)
@@ -90,7 +90,7 @@ int amiga_partition(struct parsed_partitions *state)
        }
        blk = be32_to_cpu(rdb->rdb_PartitionList);
        put_dev_sector(sect);
-       for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) {
+       for (part = 1; (s32) blk>0 && part<=16; part++, put_dev_sector(sect)) {
                /* Read in terms partition table understands */
                if (check_mul_overflow(blk, (sector_t) blksize, &blk)) {
                        pr_err("Dev %s: overflow calculating partition block %llu! Skipping partitions %u and beyond\n",
index 13a7341..e137a87 100644 (file)
@@ -281,10 +281,7 @@ static void delete_partition(struct block_device *part)
         * looked up any more even when openers still hold references.
         */
        remove_inode_hash(part->bd_inode);
-
-       fsync_bdev(part);
-       __invalidate_device(part, true);
-
+       bdev_mark_dead(part, false);
        drop_partition(part);
 }
 
index 6218c77..10efb56 100644 (file)
@@ -992,7 +992,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
                ssize_t plen;
 
                /* use the existing memory in an allocated page */
-               if (ctx->merge) {
+               if (ctx->merge && !(msg->msg_flags & MSG_SPLICE_PAGES)) {
                        sgl = list_entry(ctx->tsgl_list.prev,
                                         struct af_alg_tsgl, list);
                        sg = sgl->sg + sgl->cur - 1;
@@ -1054,6 +1054,7 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
                        ctx->used += plen;
                        copied += plen;
                        size -= plen;
+                       ctx->merge = 0;
                } else {
                        do {
                                struct page *pg;
@@ -1085,12 +1086,12 @@ int af_alg_sendmsg(struct socket *sock, struct msghdr *msg, size_t size,
                                size -= plen;
                                sgl->cur++;
                        } while (len && sgl->cur < MAX_SGL_ENTS);
+
+                       ctx->merge = plen & (PAGE_SIZE - 1);
                }
 
                if (!size)
                        sg_mark_end(sg + sgl->cur - 1);
-
-               ctx->merge = plen & (PAGE_SIZE - 1);
        }
 
        err = 0;
@@ -1240,6 +1241,8 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                                return -ENOMEM;
                }
 
+               rsgl->sgl.need_unpin =
+                       iov_iter_extract_will_pin(&msg->msg_iter);
                rsgl->sgl.sgt.sgl = rsgl->sgl.sgl;
                rsgl->sgl.sgt.nents = 0;
                rsgl->sgl.sgt.orig_nents = 0;
@@ -1254,8 +1257,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
                }
 
                sg_mark_end(rsgl->sgl.sgt.sgl + rsgl->sgl.sgt.nents - 1);
-               rsgl->sgl.need_unpin =
-                       iov_iter_extract_will_pin(&msg->msg_iter);
 
                /* chain the new scatterlist with previous one */
                if (areq->last_rsgl)
index 0ab43e1..82c44d4 100644 (file)
@@ -68,13 +68,15 @@ static int hash_sendmsg(struct socket *sock, struct msghdr *msg,
        struct hash_ctx *ctx = ask->private;
        ssize_t copied = 0;
        size_t len, max_pages, npages;
-       bool continuing = ctx->more, need_init = false;
+       bool continuing, need_init = false;
        int err;
 
        max_pages = min_t(size_t, ALG_MAX_PAGES,
                          DIV_ROUND_UP(sk->sk_sndbuf, PAGE_SIZE));
 
        lock_sock(sk);
+       continuing = ctx->more;
+
        if (!continuing) {
                /* Discard a previous request that wasn't marked MSG_MORE. */
                hash_free_result(sk, ctx);
index e787598..773e159 100644 (file)
@@ -185,8 +185,10 @@ static int software_key_query(const struct kernel_pkey_params *params,
 
        if (issig) {
                sig = crypto_alloc_sig(alg_name, 0, 0);
-               if (IS_ERR(sig))
+               if (IS_ERR(sig)) {
+                       ret = PTR_ERR(sig);
                        goto error_free_key;
+               }
 
                if (pkey->key_is_private)
                        ret = crypto_sig_set_privkey(sig, key, pkey->keylen);
@@ -208,8 +210,10 @@ static int software_key_query(const struct kernel_pkey_params *params,
                }
        } else {
                tfm = crypto_alloc_akcipher(alg_name, 0, 0);
-               if (IS_ERR(tfm))
+               if (IS_ERR(tfm)) {
+                       ret = PTR_ERR(tfm);
                        goto error_free_key;
+               }
 
                if (pkey->key_is_private)
                        ret = crypto_akcipher_set_priv_key(tfm, key, pkey->keylen);
@@ -300,8 +304,10 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
 
        if (issig) {
                sig = crypto_alloc_sig(alg_name, 0, 0);
-               if (IS_ERR(sig))
+               if (IS_ERR(sig)) {
+                       ret = PTR_ERR(sig);
                        goto error_free_key;
+               }
 
                if (pkey->key_is_private)
                        ret = crypto_sig_set_privkey(sig, key, pkey->keylen);
@@ -313,8 +319,10 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
                ksz = crypto_sig_maxsize(sig);
        } else {
                tfm = crypto_alloc_akcipher(alg_name, 0, 0);
-               if (IS_ERR(tfm))
+               if (IS_ERR(tfm)) {
+                       ret = PTR_ERR(tfm);
                        goto error_free_key;
+               }
 
                if (pkey->key_is_private)
                        ret = crypto_akcipher_set_priv_key(tfm, key, pkey->keylen);
@@ -411,8 +419,10 @@ int public_key_verify_signature(const struct public_key *pkey,
 
        key = kmalloc(pkey->keylen + sizeof(u32) * 2 + pkey->paramlen,
                      GFP_KERNEL);
-       if (!key)
+       if (!key) {
+               ret = -ENOMEM;
                goto error_free_tfm;
+       }
 
        memcpy(key, pkey->key, pkey->keylen);
        ptr = key + pkey->keylen;
index 7241d80..a7459e7 100644 (file)
@@ -195,3 +195,5 @@ obj-$(CONFIG_PECI)          += peci/
 obj-$(CONFIG_HTE)              += hte/
 obj-$(CONFIG_DRM_ACCEL)                += accel/
 obj-$(CONFIG_CDX_BUS)          += cdx/
+
+obj-$(CONFIG_S390)             += s390/
index d92ba2e..2f027d5 100644 (file)
@@ -3980,6 +3980,15 @@ static inline void hl_debugfs_fini(void)
 {
 }
 
+static inline int hl_debugfs_device_init(struct hl_device *hdev)
+{
+       return 0;
+}
+
+static inline void hl_debugfs_device_fini(struct hl_device *hdev)
+{
+}
+
 static inline void hl_debugfs_add_device(struct hl_device *hdev)
 {
 }
index d3013fb..399dc5d 100644 (file)
@@ -75,6 +75,7 @@ struct ivpu_wa_table {
        bool punit_disabled;
        bool clear_runtime_mem;
        bool d3hot_after_power_off;
+       bool interrupt_clear_with_0;
 };
 
 struct ivpu_hw_info;
index 52b339a..9967fcf 100644 (file)
@@ -173,6 +173,9 @@ static void internal_free_pages_locked(struct ivpu_bo *bo)
 {
        unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
 
+       if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+               set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+
        for (i = 0; i < npages; i++)
                put_page(bo->pages[i]);
 
@@ -587,6 +590,11 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
        if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
                drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
 
+       if (bo->flags & DRM_IVPU_BO_WC)
+               set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT);
+       else if (bo->flags & DRM_IVPU_BO_UNCACHED)
+               set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT);
+
        prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
        bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
        if (!bo->kvaddr) {
index fef3542..2a5dd3a 100644 (file)
@@ -101,6 +101,9 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
        vdev->wa.punit_disabled = ivpu_is_fpga(vdev);
        vdev->wa.clear_runtime_mem = false;
        vdev->wa.d3hot_after_power_off = true;
+
+       if (ivpu_device_id(vdev) == PCI_DEVICE_ID_MTL && ivpu_revision(vdev) < 4)
+               vdev->wa.interrupt_clear_with_0 = true;
 }
 
 static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
@@ -885,7 +888,7 @@ static void ivpu_hw_mtl_irq_disable(struct ivpu_device *vdev)
        REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x1);
        REGB_WR32(MTL_BUTTRESS_LOCAL_INT_MASK, BUTTRESS_IRQ_DISABLE_MASK);
        REGV_WR64(MTL_VPU_HOST_SS_ICB_ENABLE_0, 0x0ull);
-       REGB_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, 0x0);
+       REGV_WR32(MTL_VPU_HOST_SS_FW_SOC_IRQ_EN, 0x0);
 }
 
 static void ivpu_hw_mtl_irq_wdt_nce_handler(struct ivpu_device *vdev)
@@ -973,12 +976,15 @@ static u32 ivpu_hw_mtl_irqb_handler(struct ivpu_device *vdev, int irq)
                schedule_recovery = true;
        }
 
-       /*
-        * Clear local interrupt status by writing 0 to all bits.
-        * This must be done after interrupts are cleared at the source.
-        * Writing 1 triggers an interrupt, so we can't perform read update write.
-        */
-       REGB_WR32(MTL_BUTTRESS_INTERRUPT_STAT, 0x0);
+       /* This must be done after interrupts are cleared at the source. */
+       if (IVPU_WA(interrupt_clear_with_0))
+               /*
+                * Writing 1 triggers an interrupt, so we can't perform read update write.
+                * Clear local interrupt status by writing 0 to all bits.
+                */
+               REGB_WR32(MTL_BUTTRESS_INTERRUPT_STAT, 0x0);
+       else
+               REGB_WR32(MTL_BUTTRESS_INTERRUPT_STAT, status);
 
        /* Re-enable global interrupt */
        REGB_WR32(MTL_BUTTRESS_GLOBAL_INT_MASK, 0x0);
index 5c57f7b..388abd4 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/mm.h>
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
+#include <linux/overflow.h>
 #include <linux/pci.h>
 #include <linux/scatterlist.h>
 #include <linux/types.h>
@@ -366,7 +367,7 @@ static int encode_passthrough(struct qaic_device *qdev, void *trans, struct wrap
        if (in_trans->hdr.len % 8 != 0)
                return -EINVAL;
 
-       if (msg_hdr_len + in_trans->hdr.len > QAIC_MANAGE_EXT_MSG_LENGTH)
+       if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_EXT_MSG_LENGTH)
                return -ENOSPC;
 
        trans_wrapper = add_wrapper(wrappers,
@@ -391,18 +392,31 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
                                   struct qaic_manage_trans_dma_xfer *in_trans,
                                   struct ioctl_resources *resources, struct dma_xfer *xfer)
 {
+       u64 xfer_start_addr, remaining, end, total;
        unsigned long need_pages;
        struct page **page_list;
        unsigned long nr_pages;
        struct sg_table *sgt;
-       u64 xfer_start_addr;
        int ret;
        int i;
 
-       xfer_start_addr = in_trans->addr + resources->xferred_dma_size;
+       if (check_add_overflow(in_trans->addr, resources->xferred_dma_size, &xfer_start_addr))
+               return -EINVAL;
 
-       need_pages = DIV_ROUND_UP(in_trans->size + offset_in_page(xfer_start_addr) -
-                                 resources->xferred_dma_size, PAGE_SIZE);
+       if (in_trans->size < resources->xferred_dma_size)
+               return -EINVAL;
+       remaining = in_trans->size - resources->xferred_dma_size;
+       if (remaining == 0)
+               return 0;
+
+       if (check_add_overflow(xfer_start_addr, remaining, &end))
+               return -EINVAL;
+
+       total = remaining + offset_in_page(xfer_start_addr);
+       if (total >= SIZE_MAX)
+               return -EINVAL;
+
+       need_pages = DIV_ROUND_UP(total, PAGE_SIZE);
 
        nr_pages = need_pages;
 
@@ -418,9 +432,12 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
        }
 
        ret = get_user_pages_fast(xfer_start_addr, nr_pages, 0, page_list);
-       if (ret < 0 || ret != nr_pages) {
-               ret = -EFAULT;
+       if (ret < 0)
                goto free_page_list;
+       if (ret != nr_pages) {
+               nr_pages = ret;
+               ret = -EFAULT;
+               goto put_pages;
        }
 
        sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
@@ -431,7 +448,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
 
        ret = sg_alloc_table_from_pages(sgt, page_list, nr_pages,
                                        offset_in_page(xfer_start_addr),
-                                       in_trans->size - resources->xferred_dma_size, GFP_KERNEL);
+                                       remaining, GFP_KERNEL);
        if (ret) {
                ret = -ENOMEM;
                goto free_sgt;
@@ -557,17 +574,11 @@ static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list
        msg = &wrapper->msg;
        msg_hdr_len = le32_to_cpu(msg->hdr.len);
 
-       if (msg_hdr_len > (UINT_MAX - QAIC_MANAGE_EXT_MSG_LENGTH))
-               return -EINVAL;
-
        /* There should be enough space to hold at least one ASP entry. */
-       if (msg_hdr_len + sizeof(*out_trans) + sizeof(struct wire_addr_size_pair) >
+       if (size_add(msg_hdr_len, sizeof(*out_trans) + sizeof(struct wire_addr_size_pair)) >
            QAIC_MANAGE_EXT_MSG_LENGTH)
                return -ENOMEM;
 
-       if (in_trans->addr + in_trans->size < in_trans->addr || !in_trans->size)
-               return -EINVAL;
-
        xfer = kmalloc(sizeof(*xfer), GFP_KERNEL);
        if (!xfer)
                return -ENOMEM;
@@ -634,7 +645,7 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
        msg = &wrapper->msg;
        msg_hdr_len = le32_to_cpu(msg->hdr.len);
 
-       if (msg_hdr_len + sizeof(*out_trans) > QAIC_MANAGE_MAX_MSG_LENGTH)
+       if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_MAX_MSG_LENGTH)
                return -ENOSPC;
 
        if (!in_trans->queue_size)
@@ -718,7 +729,7 @@ static int encode_status(struct qaic_device *qdev, void *trans, struct wrapper_l
        msg = &wrapper->msg;
        msg_hdr_len = le32_to_cpu(msg->hdr.len);
 
-       if (msg_hdr_len + in_trans->hdr.len > QAIC_MANAGE_MAX_MSG_LENGTH)
+       if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_MAX_MSG_LENGTH)
                return -ENOSPC;
 
        trans_wrapper = add_wrapper(wrappers, sizeof(*trans_wrapper));
@@ -748,7 +759,8 @@ static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
        int ret;
        int i;
 
-       if (!user_msg->count) {
+       if (!user_msg->count ||
+           user_msg->len < sizeof(*trans_hdr)) {
                ret = -EINVAL;
                goto out;
        }
@@ -765,12 +777,13 @@ static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
        }
 
        for (i = 0; i < user_msg->count; ++i) {
-               if (user_len >= user_msg->len) {
+               if (user_len > user_msg->len - sizeof(*trans_hdr)) {
                        ret = -EINVAL;
                        break;
                }
                trans_hdr = (struct qaic_manage_trans_hdr *)(user_msg->data + user_len);
-               if (user_len + trans_hdr->len > user_msg->len) {
+               if (trans_hdr->len < sizeof(trans_hdr) ||
+                   size_add(user_len, trans_hdr->len) > user_msg->len) {
                        ret = -EINVAL;
                        break;
                }
@@ -953,15 +966,23 @@ static int decode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
        int ret;
        int i;
 
-       if (msg_hdr_len > QAIC_MANAGE_MAX_MSG_LENGTH)
+       if (msg_hdr_len < sizeof(*trans_hdr) ||
+           msg_hdr_len > QAIC_MANAGE_MAX_MSG_LENGTH)
                return -EINVAL;
 
        user_msg->len = 0;
        user_msg->count = le32_to_cpu(msg->hdr.count);
 
        for (i = 0; i < user_msg->count; ++i) {
+               u32 hdr_len;
+
+               if (msg_len > msg_hdr_len - sizeof(*trans_hdr))
+                       return -EINVAL;
+
                trans_hdr = (struct wire_trans_hdr *)(msg->data + msg_len);
-               if (msg_len + le32_to_cpu(trans_hdr->len) > msg_hdr_len)
+               hdr_len = le32_to_cpu(trans_hdr->len);
+               if (hdr_len < sizeof(*trans_hdr) ||
+                   size_add(msg_len, hdr_len) > msg_hdr_len)
                        return -EINVAL;
 
                switch (le32_to_cpu(trans_hdr->type)) {
index e9a1cb7..6b6d981 100644 (file)
@@ -1021,6 +1021,7 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
        bo->dbc = dbc;
        srcu_read_unlock(&dbc->ch_lock, rcu_id);
        drm_gem_object_put(obj);
+       kfree(slice_ent);
        srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
        srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
 
index 00dd309..cee82b4 100644 (file)
@@ -581,7 +581,7 @@ config ACPI_VIOT
 
 config ACPI_PRMT
        bool "Platform Runtime Mechanism Support"
-       depends on EFI && (X86_64 || ARM64)
+       depends on EFI_RUNTIME_WRAPPERS && (X86_64 || ARM64)
        default y
        help
          Platform Runtime Mechanism (PRM) is a firmware interface exposing a
index 3fc5a0d..eaa09bf 100644 (file)
@@ -50,7 +50,6 @@ acpi-$(CONFIG_PCI)            += acpi_lpss.o
 acpi-y                         += acpi_apd.o
 acpi-y                         += acpi_platform.o
 acpi-y                         += acpi_pnp.o
-acpi-$(CONFIG_ARM_AMBA)        += acpi_amba.o
 acpi-y                         += power.o
 acpi-y                         += event.o
 acpi-y                         += evged.o
index 1ace70b..225dc68 100644 (file)
@@ -34,7 +34,7 @@ MODULE_LICENSE("GPL");
 
 static int acpi_ac_add(struct acpi_device *device);
 static void acpi_ac_remove(struct acpi_device *device);
-static void acpi_ac_notify(struct acpi_device *device, u32 event);
+static void acpi_ac_notify(acpi_handle handle, u32 event, void *data);
 
 static const struct acpi_device_id ac_device_ids[] = {
        {"ACPI0003", 0},
@@ -54,11 +54,9 @@ static struct acpi_driver acpi_ac_driver = {
        .name = "ac",
        .class = ACPI_AC_CLASS,
        .ids = ac_device_ids,
-       .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
        .ops = {
                .add = acpi_ac_add,
                .remove = acpi_ac_remove,
-               .notify = acpi_ac_notify,
                },
        .drv.pm = &acpi_ac_pm,
 };
@@ -128,8 +126,9 @@ static enum power_supply_property ac_props[] = {
 };
 
 /* Driver Model */
-static void acpi_ac_notify(struct acpi_device *device, u32 event)
+static void acpi_ac_notify(acpi_handle handle, u32 event, void *data)
 {
+       struct acpi_device *device = data;
        struct acpi_ac *ac = acpi_driver_data(device);
 
        if (!ac)
@@ -235,7 +234,7 @@ static int acpi_ac_add(struct acpi_device *device)
 
        result = acpi_ac_get_state(ac);
        if (result)
-               goto end;
+               goto err_release_ac;
 
        psy_cfg.drv_data = ac;
 
@@ -248,7 +247,7 @@ static int acpi_ac_add(struct acpi_device *device)
                                            &ac->charger_desc, &psy_cfg);
        if (IS_ERR(ac->charger)) {
                result = PTR_ERR(ac->charger);
-               goto end;
+               goto err_release_ac;
        }
 
        pr_info("%s [%s] (%s)\n", acpi_device_name(device),
@@ -256,9 +255,19 @@ static int acpi_ac_add(struct acpi_device *device)
 
        ac->battery_nb.notifier_call = acpi_ac_battery_notify;
        register_acpi_notifier(&ac->battery_nb);
-end:
+
+       result = acpi_dev_install_notify_handler(device, ACPI_ALL_NOTIFY,
+                                                acpi_ac_notify);
        if (result)
-               kfree(ac);
+               goto err_unregister;
+
+       return 0;
+
+err_unregister:
+       power_supply_unregister(ac->charger);
+       unregister_acpi_notifier(&ac->battery_nb);
+err_release_ac:
+       kfree(ac);
 
        return result;
 }
@@ -297,6 +306,8 @@ static void acpi_ac_remove(struct acpi_device *device)
 
        ac = acpi_driver_data(device);
 
+       acpi_dev_remove_notify_handler(device, ACPI_ALL_NOTIFY,
+                                      acpi_ac_notify);
        power_supply_unregister(ac->charger);
        unregister_acpi_notifier(&ac->battery_nb);
 
index 4cf4aef..9b55d15 100644 (file)
@@ -51,12 +51,11 @@ acpi_cmos_rtc_space_handler(u32 function, acpi_physical_address address,
        return AE_OK;
 }
 
-static int acpi_install_cmos_rtc_space_handler(struct acpi_device *adev,
-               const struct acpi_device_id *id)
+int acpi_install_cmos_rtc_space_handler(acpi_handle handle)
 {
        acpi_status status;
 
-       status = acpi_install_address_space_handler(adev->handle,
+       status = acpi_install_address_space_handler(handle,
                        ACPI_ADR_SPACE_CMOS,
                        &acpi_cmos_rtc_space_handler,
                        NULL, NULL);
@@ -67,18 +66,30 @@ static int acpi_install_cmos_rtc_space_handler(struct acpi_device *adev,
 
        return 1;
 }
+EXPORT_SYMBOL_GPL(acpi_install_cmos_rtc_space_handler);
 
-static void acpi_remove_cmos_rtc_space_handler(struct acpi_device *adev)
+void acpi_remove_cmos_rtc_space_handler(acpi_handle handle)
 {
-       if (ACPI_FAILURE(acpi_remove_address_space_handler(adev->handle,
+       if (ACPI_FAILURE(acpi_remove_address_space_handler(handle,
                        ACPI_ADR_SPACE_CMOS, &acpi_cmos_rtc_space_handler)))
                pr_err("Error removing CMOS-RTC region handler\n");
 }
+EXPORT_SYMBOL_GPL(acpi_remove_cmos_rtc_space_handler);
+
+static int acpi_cmos_rtc_attach_handler(struct acpi_device *adev, const struct acpi_device_id *id)
+{
+       return acpi_install_cmos_rtc_space_handler(adev->handle);
+}
+
+static void acpi_cmos_rtc_detach_handler(struct acpi_device *adev)
+{
+       acpi_remove_cmos_rtc_space_handler(adev->handle);
+}
 
 static struct acpi_scan_handler cmos_rtc_handler = {
        .ids = acpi_cmos_rtc_ids,
-       .attach = acpi_install_cmos_rtc_space_handler,
-       .detach = acpi_remove_cmos_rtc_space_handler,
+       .attach = acpi_cmos_rtc_attach_handler,
+       .detach = acpi_cmos_rtc_detach_handler,
 };
 
 void __init acpi_cmos_rtc_init(void)
index e648158..e120a96 100644 (file)
@@ -172,7 +172,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
                        fru_text = "";
                sec_type = (guid_t *)gdata->section_type;
                if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
-                       struct cper_sec_mem_err *mem = (void *)(gdata + 1);
+                       struct cper_sec_mem_err *mem = acpi_hest_get_payload(gdata);
 
                        if (gdata->error_data_length >= sizeof(*mem))
                                trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
index f9aa02c..c711db8 100644 (file)
@@ -9,9 +9,11 @@
  * Copyright (C) 2013, Intel Corporation
  *                     Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  */
+#define pr_fmt(fmt) "ACPI: " fmt
 
 #include <linux/acpi.h>
 #include <linux/device.h>
+#include <linux/dmi.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -21,6 +23,8 @@
 
 #include <asm/cpu.h>
 
+#include <xen/xen.h>
+
 #include "internal.h"
 
 DEFINE_PER_CPU(struct acpi_processor *, processors);
@@ -508,54 +512,110 @@ static void acpi_processor_remove(struct acpi_device *device)
 }
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
-#ifdef CONFIG_X86
-static bool acpi_hwp_native_thermal_lvt_set;
-static acpi_status __init acpi_hwp_native_thermal_lvt_osc(acpi_handle handle,
-                                                         u32 lvl,
-                                                         void *context,
-                                                         void **rv)
+#ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
+bool __init processor_physically_present(acpi_handle handle)
+{
+       int cpuid, type;
+       u32 acpi_id;
+       acpi_status status;
+       acpi_object_type acpi_type;
+       unsigned long long tmp;
+       union acpi_object object = {};
+       struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+
+       status = acpi_get_type(handle, &acpi_type);
+       if (ACPI_FAILURE(status))
+               return false;
+
+       switch (acpi_type) {
+       case ACPI_TYPE_PROCESSOR:
+               status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+               if (ACPI_FAILURE(status))
+                       return false;
+               acpi_id = object.processor.proc_id;
+               break;
+       case ACPI_TYPE_DEVICE:
+               status = acpi_evaluate_integer(handle, METHOD_NAME__UID,
+                                              NULL, &tmp);
+               if (ACPI_FAILURE(status))
+                       return false;
+               acpi_id = tmp;
+               break;
+       default:
+               return false;
+       }
+
+       if (xen_initial_domain())
+               /*
+                * When running as a Xen dom0 the number of processors Linux
+                * sees can be different from the real number of processors on
+                * the system, and we still need to execute _PDC or _OSC for
+                * all of them.
+                */
+               return xen_processor_present(acpi_id);
+
+       type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
+       cpuid = acpi_get_cpuid(handle, type, acpi_id);
+
+       return !invalid_logical_cpuid(cpuid);
+}
+
+/* vendor specific UUID indicating an Intel platform */
+static u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
+
+static acpi_status __init acpi_processor_osc(acpi_handle handle, u32 lvl,
+                                            void *context, void **rv)
 {
-       u8 sb_uuid_str[] = "4077A616-290C-47BE-9EBD-D87058713953";
-       u32 capbuf[2];
+       u32 capbuf[2] = {};
        struct acpi_osc_context osc_context = {
                .uuid_str = sb_uuid_str,
                .rev = 1,
                .cap.length = 8,
                .cap.pointer = capbuf,
        };
+       acpi_status status;
 
-       if (acpi_hwp_native_thermal_lvt_set)
-               return AE_CTRL_TERMINATE;
+       if (!processor_physically_present(handle))
+               return AE_OK;
 
-       capbuf[0] = 0x0000;
-       capbuf[1] = 0x1000; /* set bit 12 */
+       arch_acpi_set_proc_cap_bits(&capbuf[OSC_SUPPORT_DWORD]);
 
-       if (ACPI_SUCCESS(acpi_run_osc(handle, &osc_context))) {
-               if (osc_context.ret.pointer && osc_context.ret.length > 1) {
-                       u32 *capbuf_ret = osc_context.ret.pointer;
+       status = acpi_run_osc(handle, &osc_context);
+       if (ACPI_FAILURE(status))
+               return status;
 
-                       if (capbuf_ret[1] & 0x1000) {
-                               acpi_handle_info(handle,
-                                       "_OSC native thermal LVT Acked\n");
-                               acpi_hwp_native_thermal_lvt_set = true;
-                       }
-               }
-               kfree(osc_context.ret.pointer);
-       }
+       kfree(osc_context.ret.pointer);
 
        return AE_OK;
 }
 
-void __init acpi_early_processor_osc(void)
+static bool __init acpi_early_processor_osc(void)
 {
-       if (boot_cpu_has(X86_FEATURE_HWP)) {
-               acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-                                   ACPI_UINT32_MAX,
-                                   acpi_hwp_native_thermal_lvt_osc,
-                                   NULL, NULL, NULL);
-               acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID,
-                                acpi_hwp_native_thermal_lvt_osc,
-                                NULL, NULL);
+       acpi_status status;
+
+       acpi_proc_quirk_mwait_check();
+
+       status = acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+                                    ACPI_UINT32_MAX, acpi_processor_osc, NULL,
+                                    NULL, NULL);
+       if (ACPI_FAILURE(status))
+               return false;
+
+       status = acpi_get_devices(ACPI_PROCESSOR_DEVICE_HID, acpi_processor_osc,
+                                 NULL, NULL);
+       if (ACPI_FAILURE(status))
+               return false;
+
+       return true;
+}
+
+void __init acpi_early_processor_control_setup(void)
+{
+       if (acpi_early_processor_osc()) {
+               pr_info("_OSC evaluated successfully for all CPUs\n");
+       } else {
+               pr_info("_OSC evaluation for CPUs failed, trying _PDC\n");
+               acpi_early_processor_set_pdc();
        }
 }
 #endif
index e9b8e83..33c3b16 100644 (file)
@@ -557,6 +557,7 @@ static int acpi_tad_disable_timer(struct device *dev, u32 timer_id)
 static int acpi_tad_remove(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
+       acpi_handle handle = ACPI_HANDLE(dev);
        struct acpi_tad_driver_data *dd = dev_get_drvdata(dev);
 
        device_init_wakeup(dev, false);
@@ -577,6 +578,7 @@ static int acpi_tad_remove(struct platform_device *pdev)
 
        pm_runtime_put_sync(dev);
        pm_runtime_disable(dev);
+       acpi_remove_cmos_rtc_space_handler(handle);
        return 0;
 }
 
@@ -589,6 +591,11 @@ static int acpi_tad_probe(struct platform_device *pdev)
        unsigned long long caps;
        int ret;
 
+       ret = acpi_install_cmos_rtc_space_handler(handle);
+       if (ret < 0) {
+               dev_info(dev, "Unable to install space handler\n");
+               return -ENODEV;
+       }
        /*
         * Initialization failure messages are mostly about firmware issues, so
         * print them at the "info" level.
@@ -596,22 +603,27 @@ static int acpi_tad_probe(struct platform_device *pdev)
        status = acpi_evaluate_integer(handle, "_GCP", NULL, &caps);
        if (ACPI_FAILURE(status)) {
                dev_info(dev, "Unable to get capabilities\n");
-               return -ENODEV;
+               ret = -ENODEV;
+               goto remove_handler;
        }
 
        if (!(caps & ACPI_TAD_AC_WAKE)) {
                dev_info(dev, "Unsupported capabilities\n");
-               return -ENODEV;
+               ret = -ENODEV;
+               goto remove_handler;
        }
 
        if (!acpi_has_method(handle, "_PRW")) {
                dev_info(dev, "Missing _PRW\n");
-               return -ENODEV;
+               ret = -ENODEV;
+               goto remove_handler;
        }
 
        dd = devm_kzalloc(dev, sizeof(*dd), GFP_KERNEL);
-       if (!dd)
-               return -ENOMEM;
+       if (!dd) {
+               ret = -ENOMEM;
+               goto remove_handler;
+       }
 
        dd->capabilities = caps;
        dev_set_drvdata(dev, dd);
@@ -653,6 +665,11 @@ static int acpi_tad_probe(struct platform_device *pdev)
 
 fail:
        acpi_tad_remove(pdev);
+       /* Don't fallthrough because cmos rtc space handler is removed in acpi_tad_remove() */
+       return ret;
+
+remove_handler:
+       acpi_remove_cmos_rtc_space_handler(handle);
        return ret;
 }
 
index 62f4364..948e31f 100644 (file)
@@ -77,7 +77,7 @@ static DEFINE_MUTEX(video_list_lock);
 static LIST_HEAD(video_bus_head);
 static int acpi_video_bus_add(struct acpi_device *device);
 static void acpi_video_bus_remove(struct acpi_device *device);
-static void acpi_video_bus_notify(struct acpi_device *device, u32 event);
+static void acpi_video_bus_notify(acpi_handle handle, u32 event, void *data);
 
 /*
  * Indices in the _BCL method response: the first two items are special,
@@ -104,7 +104,6 @@ static struct acpi_driver acpi_video_bus = {
        .ops = {
                .add = acpi_video_bus_add,
                .remove = acpi_video_bus_remove,
-               .notify = acpi_video_bus_notify,
                },
 };
 
@@ -1527,8 +1526,9 @@ static int acpi_video_bus_stop_devices(struct acpi_video_bus *video)
                                  acpi_osi_is_win8() ? 0 : 1);
 }
 
-static void acpi_video_bus_notify(struct acpi_device *device, u32 event)
+static void acpi_video_bus_notify(acpi_handle handle, u32 event, void *data)
 {
+       struct acpi_device *device = data;
        struct acpi_video_bus *video = acpi_driver_data(device);
        struct input_dev *input;
        int keycode = 0;
@@ -2027,6 +2027,12 @@ static int acpi_video_bus_add(struct acpi_device *device)
        if (error)
                goto err_put_video;
 
+       /*
+        * HP ZBook Fury 16 G10 requires ACPI video's child devices have _PS0
+        * evaluated to have functional panel brightness control.
+        */
+       acpi_device_fix_up_power_extended(device);
+
        pr_info("%s [%s] (multi-head: %s  rom: %s  post: %s)\n",
               ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device),
               video->flags.multihead ? "yes" : "no",
@@ -2053,8 +2059,19 @@ static int acpi_video_bus_add(struct acpi_device *device)
 
        acpi_video_bus_add_notify_handler(video);
 
+       error = acpi_dev_install_notify_handler(device, ACPI_DEVICE_NOTIFY,
+                                               acpi_video_bus_notify);
+       if (error)
+               goto err_remove;
+
        return 0;
 
+err_remove:
+       mutex_lock(&video_list_lock);
+       list_del(&video->entry);
+       mutex_unlock(&video_list_lock);
+       acpi_video_bus_remove_notify_handler(video);
+       acpi_video_bus_unregister_backlight(video);
 err_put_video:
        acpi_video_bus_put_devices(video);
        kfree(video->attached_array);
@@ -2075,6 +2092,9 @@ static void acpi_video_bus_remove(struct acpi_device *device)
 
        video = acpi_driver_data(device);
 
+       acpi_dev_remove_notify_handler(device, ACPI_DEVICE_NOTIFY,
+                                      acpi_video_bus_notify);
+
        mutex_lock(&video_list_lock);
        list_del(&video->entry);
        mutex_unlock(&video_list_lock);
index 22f1f7a..911875c 100644 (file)
@@ -287,4 +287,6 @@ struct acpi_namespace_node *acpi_db_local_ns_lookup(char *name);
 
 void acpi_db_uint32_to_hex_string(u32 value, char *buffer);
 
+void acpi_db_generate_interrupt(char *gsiv_arg);
+
 #endif                         /* __ACDEBUG_H__ */
index 7782411..f4c90fc 100644 (file)
@@ -129,6 +129,7 @@ ACPI_GLOBAL(acpi_table_handler, acpi_gbl_table_handler);
 ACPI_GLOBAL(void *, acpi_gbl_table_handler_context);
 ACPI_GLOBAL(acpi_interface_handler, acpi_gbl_interface_handler);
 ACPI_GLOBAL(struct acpi_sci_handler_info *, acpi_gbl_sci_handler_list);
+ACPI_GLOBAL(struct acpi_ged_handler_info *, acpi_gbl_ged_handler_list);
 
 /* Owner ID support */
 
index 12d4a02..82563b4 100644 (file)
@@ -543,6 +543,14 @@ struct acpi_field_info {
        u32 pkg_length;
 };
 
+/* Information about the interrupt ID and _EVT of a GED device */
+
+struct acpi_ged_handler_info {
+       struct acpi_ged_handler_info *next;
+       u32 int_id;             /* The interrupt ID that triggers the execution ofthe evt_method. */
+       struct acpi_namespace_node *evt_method; /* The _EVT method to be executed when an interrupt with ID = int_ID is received */
+};
+
 /*****************************************************************************
  *
  * Generic "state" object for stacks
@@ -560,25 +568,28 @@ struct acpi_field_info {
        u8                              descriptor_type; /* To differentiate various internal objs */\
        u8                              flags; \
        u16                             value; \
-       u16                             state;
+       u16                             state
 
        /* There are 2 bytes available here until the next natural alignment boundary */
 
 struct acpi_common_state {
-ACPI_STATE_COMMON};
+       ACPI_STATE_COMMON;
+};
 
 /*
  * Update state - used to traverse complex objects such as packages
  */
 struct acpi_update_state {
-       ACPI_STATE_COMMON union acpi_operand_object *object;
+       ACPI_STATE_COMMON;
+       union acpi_operand_object *object;
 };
 
 /*
  * Pkg state - used to traverse nested package structures
  */
 struct acpi_pkg_state {
-       ACPI_STATE_COMMON u32 index;
+       ACPI_STATE_COMMON;
+       u32 index;
        union acpi_operand_object *source_object;
        union acpi_operand_object *dest_object;
        struct acpi_walk_state *walk_state;
@@ -591,7 +602,8 @@ struct acpi_pkg_state {
  * Allows nesting of these constructs
  */
 struct acpi_control_state {
-       ACPI_STATE_COMMON u16 opcode;
+       ACPI_STATE_COMMON;
+       u16 opcode;
        union acpi_parse_object *predicate_op;
        u8 *aml_predicate_start;        /* Start of if/while predicate */
        u8 *package_end;        /* End of if/while block */
@@ -602,11 +614,13 @@ struct acpi_control_state {
  * Scope state - current scope during namespace lookups
  */
 struct acpi_scope_state {
-       ACPI_STATE_COMMON struct acpi_namespace_node *node;
+       ACPI_STATE_COMMON;
+       struct acpi_namespace_node *node;
 };
 
 struct acpi_pscope_state {
-       ACPI_STATE_COMMON u32 arg_count;        /* Number of fixed arguments */
+       ACPI_STATE_COMMON;
+       u32 arg_count;          /* Number of fixed arguments */
        union acpi_parse_object *op;    /* Current op being parsed */
        u8 *arg_end;            /* Current argument end */
        u8 *pkg_end;            /* Current package end */
@@ -618,7 +632,8 @@ struct acpi_pscope_state {
  * states are created when there are nested control methods executing.
  */
 struct acpi_thread_state {
-       ACPI_STATE_COMMON u8 current_sync_level;        /* Mutex Sync (nested acquire) level */
+       ACPI_STATE_COMMON;
+       u8 current_sync_level;  /* Mutex Sync (nested acquire) level */
        struct acpi_walk_state *walk_state_list;        /* Head of list of walk_states for this thread */
        union acpi_operand_object *acquired_mutex_list; /* List of all currently acquired mutexes */
        acpi_thread_id thread_id;       /* Running thread ID */
@@ -629,8 +644,8 @@ struct acpi_thread_state {
  * AML arguments
  */
 struct acpi_result_values {
-       ACPI_STATE_COMMON
-           union acpi_operand_object *obj_desc[ACPI_RESULTS_FRAME_OBJ_NUM];
+       ACPI_STATE_COMMON;
+       union acpi_operand_object *obj_desc[ACPI_RESULTS_FRAME_OBJ_NUM];
 };
 
 typedef
@@ -652,7 +667,8 @@ struct acpi_global_notify_handler {
  * handler/dispatcher.
  */
 struct acpi_notify_info {
-       ACPI_STATE_COMMON u8 handler_list_id;
+       ACPI_STATE_COMMON;
+       u8 handler_list_id;
        struct acpi_namespace_node *node;
        union acpi_operand_object *handler_list_head;
        struct acpi_global_notify_handler *global;
index e64aabe..2e442f5 100644 (file)
@@ -440,6 +440,9 @@ const union acpi_predefined_info acpi_gbl_predefined_methods[] = {
        {{"_DOS", METHOD_1ARGS(ACPI_TYPE_INTEGER),
          METHOD_NO_RETURN_VALUE}},
 
+       {{"_DSC", METHOD_0ARGS,
+         METHOD_RETURNS(ACPI_RTYPE_INTEGER)}},
+
        {{"_DSD", METHOD_0ARGS, /* ACPI 6.0 */
          METHOD_RETURNS(ACPI_RTYPE_PACKAGE)}}, /* Variable-length (Pkgs) each: 1 Buf, 1 Pkg */
        PACKAGE_INFO(ACPI_PTYPE2_UUID_PAIR, ACPI_RTYPE_BUFFER, 1,
index 9eb68e0..3d99a90 100644 (file)
@@ -1010,6 +1010,64 @@ void acpi_db_display_resources(char *object_arg)
        acpi_db_set_output_destination(ACPI_DB_CONSOLE_OUTPUT);
 }
 
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_db_generate_ged
+ *
+ * PARAMETERS:  ged_arg             - Raw GED number, ascii string
+ *
+ * RETURN:      None
+ *
+ * DESCRIPTION: Simulate firing of a GED
+ *
+ ******************************************************************************/
+
+void acpi_db_generate_interrupt(char *gsiv_arg)
+{
+       u32 gsiv_number;
+       struct acpi_ged_handler_info *ged_info = acpi_gbl_ged_handler_list;
+
+       if (!ged_info) {
+               acpi_os_printf("No GED handling present\n");
+       }
+
+       gsiv_number = strtoul(gsiv_arg, NULL, 0);
+
+       while (ged_info) {
+
+               if (ged_info->int_id == gsiv_number) {
+                       struct acpi_object_list arg_list;
+                       union acpi_object arg0;
+                       acpi_handle evt_handle = ged_info->evt_method;
+                       acpi_status status;
+
+                       acpi_os_printf("Evaluate GED _EVT (GSIV=%d)\n",
+                                      gsiv_number);
+
+                       if (!evt_handle) {
+                               acpi_os_printf("Undefined _EVT method\n");
+                               return;
+                       }
+
+                       arg0.integer.type = ACPI_TYPE_INTEGER;
+                       arg0.integer.value = gsiv_number;
+
+                       arg_list.count = 1;
+                       arg_list.pointer = &arg0;
+
+                       status =
+                           acpi_evaluate_object(evt_handle, NULL, &arg_list,
+                                                NULL);
+                       if (ACPI_FAILURE(status)) {
+                               acpi_os_printf("Could not evaluate _EVT\n");
+                               return;
+                       }
+
+               }
+               ged_info = ged_info->next;
+       }
+}
+
 #if (!ACPI_REDUCED_HARDWARE)
 /*******************************************************************************
  *
index b8a4892..861b12c 100644 (file)
@@ -106,6 +106,7 @@ enum acpi_ex_debugger_commands {
        CMD_THREADS,
 
        CMD_TEST,
+       CMD_INTERRUPT,
 #endif
 };
 
@@ -185,6 +186,7 @@ static const struct acpi_db_command_info acpi_gbl_db_commands[] = {
        {"THREADS", 3},
 
        {"TEST", 1},
+       {"INTERRUPT", 1},
 #endif
        {NULL, 0}
 };
@@ -318,6 +320,7 @@ static const struct acpi_db_command_help acpi_gbl_db_command_help[] = {
        {1, "  Gpes", "Display info on all GPE devices\n"},
        {1, "  Sci", "Generate an SCI\n"},
        {1, "  Sleep [SleepState]", "Simulate sleep/wake sequence(s) (0-5)\n"},
+       {1, "  Interrupt <GSIV>", "Simulate an interrupt\n"},
 #endif
        {0, NULL, NULL}
 };
@@ -1064,6 +1067,11 @@ acpi_db_command_dispatch(char *input_buffer,
                acpi_os_printf("Event command not implemented\n");
                break;
 
+       case CMD_INTERRUPT:
+
+               acpi_db_generate_interrupt(acpi_gbl_db_args[1]);
+               break;
+
        case CMD_GPE:
 
                acpi_db_generate_gpe(acpi_gbl_db_args[1], acpi_gbl_db_args[2]);
index d3841de..75338a1 100644 (file)
@@ -146,8 +146,8 @@ acpi_ds_result_push(union acpi_operand_object *object,
 
        if (!object) {
                ACPI_ERROR((AE_INFO,
-                           "Null Object! Obj=%p State=%p Num=%u",
-                           object, walk_state, walk_state->result_count));
+                           "Null Object! State=%p Num=%u",
+                           walk_state, walk_state->result_count));
                return (AE_BAD_PARAMETER);
        }
 
index 5d99b1a..5241f4c 100644 (file)
@@ -343,8 +343,7 @@ acpi_ex_write_serial_bus(union acpi_operand_object *source_desc,
        /* Copy the input buffer data to the transfer buffer */
 
        buffer = buffer_desc->buffer.pointer;
-       data_length = (buffer_length < source_desc->buffer.length ?
-                      buffer_length : source_desc->buffer.length);
+       data_length = ACPI_MIN(buffer_length, source_desc->buffer.length);
        memcpy(buffer, source_desc->buffer.pointer, data_length);
 
        /* Lock entire transaction if requested */
index 09029fe..39e3103 100644 (file)
@@ -603,7 +603,7 @@ const struct acpi_opcode_info acpi_gbl_aml_op_info[AML_NUM_OPCODES] = {
 
 /* 7E */ ACPI_OP("Timer", ARGP_TIMER_OP, ARGI_TIMER_OP, ACPI_TYPE_ANY,
                         AML_CLASS_EXECUTE, AML_TYPE_EXEC_0A_0T_1R,
-                        AML_FLAGS_EXEC_0A_0T_1R),
+                        AML_FLAGS_EXEC_0A_0T_1R | AML_NO_OPERAND_RESOLVE),
 
 /* ACPI 5.0 opcodes */
 
index 1bbba85..c5f6c85 100644 (file)
@@ -37,7 +37,12 @@ void acpi_ut_init_stack_ptr_trace(void)
 {
        acpi_size current_sp;
 
+#pragma GCC diagnostic push
+#if defined(__GNUC__) && __GNUC__ >= 12
+#pragma GCC diagnostic ignored "-Wdangling-pointer="
+#endif
        acpi_gbl_entry_stack_pointer = &current_sp;
+#pragma GCC diagnostic pop
 }
 
 /*******************************************************************************
index f81fe24..143debc 100644 (file)
@@ -3,4 +3,5 @@ obj-$(CONFIG_ACPI_AGDI)         += agdi.o
 obj-$(CONFIG_ACPI_IORT)        += iort.o
 obj-$(CONFIG_ACPI_GTDT)        += gtdt.o
 obj-$(CONFIG_ACPI_APMT)        += apmt.o
+obj-$(CONFIG_ARM_AMBA)         += amba.o
 obj-y                          += dma.o init.o
similarity index 99%
rename from drivers/acpi/acpi_amba.c
rename to drivers/acpi/arm64/amba.c
index f5b443a..b2a7631 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-#include "internal.h"
+#include "init.h"
 
 static const struct acpi_device_id amba_id_list[] = {
        {"ARMH0061", 0}, /* PL061 GPIO Device */
index d3ce53d..d0c8aed 100644 (file)
@@ -10,4 +10,6 @@ void __init acpi_arm_init(void)
                acpi_apmt_init();
        if (IS_ENABLED(CONFIG_ACPI_IORT))
                acpi_iort_init();
+       if (IS_ENABLED(CONFIG_ARM_AMBA))
+               acpi_amba_init();
 }
index a1715a2..dcc2779 100644 (file)
@@ -4,3 +4,4 @@
 void __init acpi_agdi_init(void);
 void __init acpi_apmt_init(void);
 void __init acpi_iort_init(void);
+void __init acpi_amba_init(void);
index 3631230..6496ff5 100644 (file)
@@ -1007,9 +1007,6 @@ static void iort_node_get_rmr_info(struct acpi_iort_node *node,
        for (i = 0; i < node->mapping_count; i++, map++) {
                struct acpi_iort_node *parent;
 
-               if (!map->id_count)
-                       continue;
-
                parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
                                      map->output_reference);
                if (parent != iommu)
@@ -1711,7 +1708,10 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res,
 static struct acpi_platform_list pmcg_plat_info[] __initdata = {
        /* HiSilicon Hip08 Platform */
        {"HISI  ", "HIP08   ", 0, ACPI_SIG_IORT, greater_than_or_equal,
-        "Erratum #162001800", IORT_SMMU_V3_PMCG_HISI_HIP08},
+        "Erratum #162001800, Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP08},
+       /* HiSilicon Hip09 Platform */
+       {"HISI  ", "HIP09   ", 0, ACPI_SIG_IORT, greater_than_or_equal,
+        "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09},
        { }
 };
 
index 9c67ed0..969bf81 100644 (file)
@@ -1034,8 +1034,9 @@ static void acpi_battery_refresh(struct acpi_battery *battery)
 }
 
 /* Driver Interface */
-static void acpi_battery_notify(struct acpi_device *device, u32 event)
+static void acpi_battery_notify(acpi_handle handle, u32 event, void *data)
 {
+       struct acpi_device *device = data;
        struct acpi_battery *battery = acpi_driver_data(device);
        struct power_supply *old;
 
@@ -1212,13 +1213,22 @@ static int acpi_battery_add(struct acpi_device *device)
 
        device_init_wakeup(&device->dev, 1);
 
-       return result;
+       result = acpi_dev_install_notify_handler(device, ACPI_ALL_NOTIFY,
+                                                acpi_battery_notify);
+       if (result)
+               goto fail_pm;
+
+       return 0;
 
+fail_pm:
+       device_init_wakeup(&device->dev, 0);
+       unregister_pm_notifier(&battery->pm_nb);
 fail:
        sysfs_remove_battery(battery);
        mutex_destroy(&battery->lock);
        mutex_destroy(&battery->sysfs_lock);
        kfree(battery);
+
        return result;
 }
 
@@ -1228,10 +1238,16 @@ static void acpi_battery_remove(struct acpi_device *device)
 
        if (!device || !acpi_driver_data(device))
                return;
-       device_init_wakeup(&device->dev, 0);
+
        battery = acpi_driver_data(device);
+
+       acpi_dev_remove_notify_handler(device, ACPI_ALL_NOTIFY,
+                                      acpi_battery_notify);
+
+       device_init_wakeup(&device->dev, 0);
        unregister_pm_notifier(&battery->pm_nb);
        sysfs_remove_battery(battery);
+
        mutex_destroy(&battery->lock);
        mutex_destroy(&battery->sysfs_lock);
        kfree(battery);
@@ -1264,11 +1280,9 @@ static struct acpi_driver acpi_battery_driver = {
        .name = "battery",
        .class = ACPI_BATTERY_CLASS,
        .ids = battery_device_ids,
-       .flags = ACPI_DRIVER_ALL_NOTIFY_EVENTS,
        .ops = {
                .add = acpi_battery_add,
                .remove = acpi_battery_remove,
-               .notify = acpi_battery_notify,
                },
        .drv.pm = &acpi_battery_pm,
 };
index 2fc2b43..f41dda2 100644 (file)
@@ -554,6 +554,30 @@ static void acpi_device_remove_notify_handler(struct acpi_device *device,
        acpi_os_wait_events_complete();
 }
 
+int acpi_dev_install_notify_handler(struct acpi_device *adev,
+                                   u32 handler_type,
+                                   acpi_notify_handler handler)
+{
+       acpi_status status;
+
+       status = acpi_install_notify_handler(adev->handle, handler_type,
+                                            handler, adev);
+       if (ACPI_FAILURE(status))
+               return -ENODEV;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_install_notify_handler);
+
+void acpi_dev_remove_notify_handler(struct acpi_device *adev,
+                                   u32 handler_type,
+                                   acpi_notify_handler handler)
+{
+       acpi_remove_notify_handler(adev->handle, handler_type, handler);
+       acpi_os_wait_events_complete();
+}
+EXPORT_SYMBOL_GPL(acpi_dev_remove_notify_handler);
+
 /* Handle events targeting \_SB device (at present only graceful shutdown) */
 
 #define ACPI_SB_NOTIFY_SHUTDOWN_REQUEST 0x81
@@ -1005,8 +1029,10 @@ static int acpi_device_probe(struct device *dev)
                return -ENOSYS;
 
        ret = acpi_drv->ops.add(acpi_dev);
-       if (ret)
+       if (ret) {
+               acpi_dev->driver_data = NULL;
                return ret;
+       }
 
        pr_debug("Driver [%s] successfully bound to device [%s]\n",
                 acpi_drv->name, acpi_dev->pnp.bus_id);
@@ -1296,9 +1322,6 @@ static int __init acpi_bus_init(void)
                goto error1;
        }
 
-       /* Set capability bits for _OSC under processor scope */
-       acpi_early_processor_osc();
-
        /*
         * _OSC method may exist in module level code,
         * so it must be run after ACPI_FULL_INITIALIZATION
@@ -1314,7 +1337,7 @@ static int __init acpi_bus_init(void)
 
        acpi_sysfs_init();
 
-       acpi_early_processor_set_pdc();
+       acpi_early_processor_control_setup();
 
        /*
         * Maybe EC region is required at bus_scan/acpi_get_devices. So it
index 78d44e3..46c6f8c 100644 (file)
@@ -42,22 +42,32 @@ EXPORT_SYMBOL_GPL(unregister_acpi_hed_notifier);
  * it is used by HEST Generic Hardware Error Source with notify type
  * SCI.
  */
-static void acpi_hed_notify(struct acpi_device *device, u32 event)
+static void acpi_hed_notify(acpi_handle handle, u32 event, void *data)
 {
        blocking_notifier_call_chain(&acpi_hed_notify_list, 0, NULL);
 }
 
 static int acpi_hed_add(struct acpi_device *device)
 {
+       int err;
+
        /* Only one hardware error device */
        if (hed_handle)
                return -EINVAL;
        hed_handle = device->handle;
-       return 0;
+
+       err = acpi_dev_install_notify_handler(device, ACPI_DEVICE_NOTIFY,
+                                             acpi_hed_notify);
+       if (err)
+               hed_handle = NULL;
+
+       return err;
 }
 
 static void acpi_hed_remove(struct acpi_device *device)
 {
+       acpi_dev_remove_notify_handler(device, ACPI_DEVICE_NOTIFY,
+                                      acpi_hed_notify);
        hed_handle = NULL;
 }
 
@@ -68,7 +78,6 @@ static struct acpi_driver acpi_hed_driver = {
        .ops = {
                .add = acpi_hed_add,
                .remove = acpi_hed_remove,
-               .notify = acpi_hed_notify,
        },
 };
 module_acpi_driver(acpi_hed_driver);
index f4148dc..866c7c4 100644 (file)
@@ -28,11 +28,6 @@ void acpi_processor_init(void);
 void acpi_platform_init(void);
 void acpi_pnp_init(void);
 void acpi_int340x_thermal_init(void);
-#ifdef CONFIG_ARM_AMBA
-void acpi_amba_init(void);
-#else
-static inline void acpi_amba_init(void) {}
-#endif
 int acpi_sysfs_init(void);
 void acpi_gpe_apply_masked_gpes(void);
 void acpi_container_init(void);
@@ -128,7 +123,6 @@ int __acpi_device_uevent_modalias(const struct acpi_device *adev,
 /* --------------------------------------------------------------------------
                                   Power Resource
    -------------------------------------------------------------------------- */
-int acpi_power_init(void);
 void acpi_power_resources_list_free(struct list_head *list);
 int acpi_extract_power_resources(union acpi_object *package, unsigned int start,
                                 struct list_head *list);
@@ -152,15 +146,13 @@ int acpi_wakeup_device_init(void);
                                   Processor
    -------------------------------------------------------------------------- */
 #ifdef CONFIG_ARCH_MIGHT_HAVE_ACPI_PDC
+void acpi_early_processor_control_setup(void);
 void acpi_early_processor_set_pdc(void);
-#else
-static inline void acpi_early_processor_set_pdc(void) {}
-#endif
 
-#ifdef CONFIG_X86
-void acpi_early_processor_osc(void);
+void acpi_proc_quirk_mwait_check(void);
+bool processor_physically_present(acpi_handle handle);
 #else
-static inline void acpi_early_processor_osc(void) {}
+static inline void acpi_early_processor_control_setup(void) {}
 #endif
 
 /* --------------------------------------------------------------------------
index 07204d4..f0e6738 100644 (file)
@@ -3282,6 +3282,23 @@ static void acpi_nfit_put_table(void *table)
        acpi_put_table(table);
 }
 
+static void acpi_nfit_notify(acpi_handle handle, u32 event, void *data)
+{
+       struct acpi_device *adev = data;
+
+       device_lock(&adev->dev);
+       __acpi_nfit_notify(&adev->dev, handle, event);
+       device_unlock(&adev->dev);
+}
+
+static void acpi_nfit_remove_notify_handler(void *data)
+{
+       struct acpi_device *adev = data;
+
+       acpi_dev_remove_notify_handler(adev, ACPI_DEVICE_NOTIFY,
+                                      acpi_nfit_notify);
+}
+
 void acpi_nfit_shutdown(void *data)
 {
        struct acpi_nfit_desc *acpi_desc = data;
@@ -3368,12 +3385,18 @@ static int acpi_nfit_add(struct acpi_device *adev)
 
        if (rc)
                return rc;
-       return devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc);
-}
 
-static void acpi_nfit_remove(struct acpi_device *adev)
-{
-       /* see acpi_nfit_unregister */
+       rc = devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc);
+       if (rc)
+               return rc;
+
+       rc = acpi_dev_install_notify_handler(adev, ACPI_DEVICE_NOTIFY,
+                                            acpi_nfit_notify);
+       if (rc)
+               return rc;
+
+       return devm_add_action_or_reset(dev, acpi_nfit_remove_notify_handler,
+                                       adev);
 }
 
 static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
@@ -3446,13 +3469,6 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
 }
 EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
 
-static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
-{
-       device_lock(&adev->dev);
-       __acpi_nfit_notify(&adev->dev, adev->handle, event);
-       device_unlock(&adev->dev);
-}
-
 static const struct acpi_device_id acpi_nfit_ids[] = {
        { "ACPI0012", 0 },
        { "", 0 },
@@ -3464,8 +3480,6 @@ static struct acpi_driver acpi_nfit_driver = {
        .ids = acpi_nfit_ids,
        .ops = {
                .add = acpi_nfit_add,
-               .remove = acpi_nfit_remove,
-               .notify = acpi_nfit_notify,
        },
 };
 
index 3d4c462..7020584 100644 (file)
@@ -53,7 +53,7 @@ static LIST_HEAD(prm_module_list);
 
 struct prm_handler_info {
        guid_t guid;
-       void *handler_addr;
+       efi_status_t (__efiapi *handler_addr)(u64, void *);
        u64 static_data_buffer_addr;
        u64 acpi_param_buffer_addr;
 
@@ -260,9 +260,9 @@ static acpi_status acpi_platformrt_space_handler(u32 function,
                context.static_data_buffer = handler->static_data_buffer_addr;
                context.mmio_ranges = module->mmio_info;
 
-               status = efi_call_virt_pointer(handler, handler_addr,
-                                              handler->acpi_param_buffer_addr,
-                                              &context);
+               status = efi_call_acpi_prm_handler(handler->handler_addr,
+                                                  handler->acpi_param_buffer_addr,
+                                                  &context);
                if (status == EFI_SUCCESS) {
                        buffer->prm_status = PRM_HANDLER_SUCCESS;
                } else {
index d6606a9..7dd6dba 100644 (file)
@@ -132,6 +132,30 @@ static int map_rintc_hartid(struct acpi_subtable_header *entry,
        return -EINVAL;
 }
 
+/*
+ * Retrieve LoongArch CPU physical id
+ */
+static int map_core_pic_id(struct acpi_subtable_header *entry,
+               int device_declaration, u32 acpi_id, phys_cpuid_t *phys_id)
+{
+       struct acpi_madt_core_pic *core_pic =
+               container_of(entry, struct acpi_madt_core_pic, header);
+
+       if (!(core_pic->flags & ACPI_MADT_ENABLED))
+               return -ENODEV;
+
+       /* device_declaration means Device object in DSDT, in LoongArch
+        * system, logical processor acpi_id is required in _UID property
+        * of DSDT table, so we should check device_declaration here
+        */
+       if (device_declaration && (core_pic->processor_id == acpi_id)) {
+               *phys_id = core_pic->core_id;
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
 static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
                                   int type, u32 acpi_id)
 {
@@ -165,6 +189,9 @@ static phys_cpuid_t map_madt_entry(struct acpi_table_madt *madt,
                } else if (header->type == ACPI_MADT_TYPE_RINTC) {
                        if (!map_rintc_hartid(header, type, acpi_id, &phys_id))
                                break;
+               } else if (header->type == ACPI_MADT_TYPE_CORE_PIC) {
+                       if (!map_core_pic_id(header, type, acpi_id, &phys_id))
+                               break;
                }
                entry += header->length;
        }
@@ -216,6 +243,8 @@ static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id)
                map_x2apic_id(header, type, acpi_id, &phys_id);
        else if (header->type == ACPI_MADT_TYPE_GENERIC_INTERRUPT)
                map_gicc_mpidr(header, type, acpi_id, &phys_id);
+       else if (header->type == ACPI_MADT_TYPE_CORE_PIC)
+               map_core_pic_id(header, type, acpi_id, &phys_id);
 
 exit:
        kfree(buffer.pointer);
index 18fb045..1a8591e 100644 (file)
@@ -9,71 +9,19 @@
 
 #define pr_fmt(fmt) "ACPI: " fmt
 
-#include <linux/dmi.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <acpi/processor.h>
 
-#include <xen/xen.h>
-
 #include "internal.h"
 
-static bool __init processor_physically_present(acpi_handle handle)
-{
-       int cpuid, type;
-       u32 acpi_id;
-       acpi_status status;
-       acpi_object_type acpi_type;
-       unsigned long long tmp;
-       union acpi_object object = { 0 };
-       struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-
-       status = acpi_get_type(handle, &acpi_type);
-       if (ACPI_FAILURE(status))
-               return false;
-
-       switch (acpi_type) {
-       case ACPI_TYPE_PROCESSOR:
-               status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
-               if (ACPI_FAILURE(status))
-                       return false;
-               acpi_id = object.processor.proc_id;
-               break;
-       case ACPI_TYPE_DEVICE:
-               status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
-               if (ACPI_FAILURE(status))
-                       return false;
-               acpi_id = tmp;
-               break;
-       default:
-               return false;
-       }
-
-       if (xen_initial_domain())
-               /*
-                * When running as a Xen dom0 the number of processors Linux
-                * sees can be different from the real number of processors on
-                * the system, and we still need to execute _PDC for all of
-                * them.
-                */
-               return xen_processor_present(acpi_id);
-
-       type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0;
-       cpuid = acpi_get_cpuid(handle, type, acpi_id);
-
-       return !invalid_logical_cpuid(cpuid);
-}
-
 static void acpi_set_pdc_bits(u32 *buf)
 {
        buf[0] = ACPI_PDC_REVISION_ID;
        buf[1] = 1;
 
-       /* Enable coordination with firmware's _TSD info */
-       buf[2] = ACPI_PDC_SMP_T_SWCOORD;
-
        /* Twiddle arch-specific bits needed for _PDC */
-       arch_acpi_set_pdc_bits(buf);
+       arch_acpi_set_proc_cap_bits(&buf[2]);
 }
 
 static struct acpi_object_list *acpi_processor_alloc_pdc(void)
@@ -123,20 +71,6 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
 {
        acpi_status status = AE_OK;
 
-       if (boot_option_idle_override == IDLE_NOMWAIT) {
-               /*
-                * If mwait is disabled for CPU C-states, the C2C3_FFH access
-                * mode will be disabled in the parameter of _PDC object.
-                * Of course C1_FFH access mode will also be disabled.
-                */
-               union acpi_object *obj;
-               u32 *buffer = NULL;
-
-               obj = pdc_in->pointer;
-               buffer = (u32 *)(obj->buffer.pointer);
-               buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
-
-       }
        status = acpi_evaluate_object(handle, "_PDC", pdc_in, NULL);
 
        if (ACPI_FAILURE(status))
@@ -174,36 +108,9 @@ early_init_pdc(acpi_handle handle, u32 lvl, void *context, void **rv)
        return AE_OK;
 }
 
-static int __init set_no_mwait(const struct dmi_system_id *id)
-{
-       pr_notice("%s detected - disabling mwait for CPU C-states\n",
-                 id->ident);
-       boot_option_idle_override = IDLE_NOMWAIT;
-       return 0;
-}
-
-static const struct dmi_system_id processor_idle_dmi_table[] __initconst = {
-       {
-       set_no_mwait, "Extensa 5220", {
-       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
-       DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
-       DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
-       {},
-};
-
-static void __init processor_dmi_check(void)
-{
-       /*
-        * Check whether the system is DMI table. If yes, OSPM
-        * should not use mwait for CPU-states.
-        */
-       dmi_check_system(processor_idle_dmi_table);
-}
-
 void __init acpi_early_processor_set_pdc(void)
 {
-       processor_dmi_check();
+       acpi_proc_quirk_mwait_check();
 
        acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
                            ACPI_UINT32_MAX,
index 1dd8d5a..32cfa3f 100644 (file)
@@ -470,6 +470,49 @@ static const struct dmi_system_id asus_laptop[] = {
        { }
 };
 
+static const struct dmi_system_id tongfang_gm_rg[] = {
+       {
+               .ident = "TongFang GMxRGxx/XMG CORE 15 (M22)/TUXEDO Stellaris 15 Gen4 AMD",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"),
+               },
+       },
+       { }
+};
+
+static const struct dmi_system_id maingear_laptop[] = {
+       {
+               .ident = "MAINGEAR Vector Pro 2 15",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-15A3070T"),
+               }
+       },
+       {
+               .ident = "MAINGEAR Vector Pro 2 17",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-17A3070T"),
+               },
+       },
+       { }
+};
+
+static const struct dmi_system_id pcspecialist_laptop[] = {
+       {
+               .ident = "PCSpecialist Elimina Pro 16 M",
+               /*
+                * Some models have product-name "Elimina Pro 16 M",
+                * others "GM6BGEQ". Match on board-name to match both.
+                */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"),
+                       DMI_MATCH(DMI_BOARD_NAME, "GM6BGEQ"),
+               },
+       },
+       { }
+};
+
 static const struct dmi_system_id lg_laptop[] = {
        {
                .ident = "LG Electronics 17U70P",
@@ -493,6 +536,9 @@ struct irq_override_cmp {
 static const struct irq_override_cmp override_table[] = {
        { medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
        { asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
+       { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+       { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+       { pcspecialist_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
        { lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
 };
 
@@ -512,6 +558,28 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
                        return entry->override;
        }
 
+#ifdef CONFIG_X86
+       /*
+        * Always use the MADT override info, except for the i8042 PS/2 ctrl
+        * IRQs (1 and 12). For these the DSDT IRQ settings should sometimes
+        * be used otherwise PS/2 keyboards / mice will not work.
+        */
+       if (gsi != 1 && gsi != 12)
+               return true;
+
+       /* If the override comes from an INT_SRC_OVR MADT entry, honor it. */
+       if (acpi_int_src_ovr[gsi])
+               return true;
+
+       /*
+        * IRQ override isn't needed on modern AMD Zen systems and
+        * this override breaks active low IRQs on AMD Ryzen 6000 and
+        * newer systems. Skip it.
+        */
+       if (boot_cpu_has(X86_FEATURE_ZEN))
+               return false;
+#endif
+
        return true;
 }
 
index 5b145f1..531a9e3 100644 (file)
@@ -795,6 +795,9 @@ static const char * const acpi_ignore_dep_ids[] = {
 /* List of HIDs for which we honor deps of matching ACPI devs, when checking _DEP lists. */
 static const char * const acpi_honor_dep_ids[] = {
        "INT3472", /* Camera sensor PMIC / clk and regulator info */
+       "INTC1059", /* IVSC (TGL) driver must be loaded to allow i2c access to camera sensors */
+       "INTC1095", /* IVSC (ADL) driver must be loaded to allow i2c access to camera sensors */
+       "INTC100A", /* IVSC (RPL) driver must be loaded to allow i2c access to camera sensors */
        NULL
 };
 
@@ -1714,6 +1717,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device)
                {"BSG1160", },
                {"BSG2150", },
                {"CSC3551", },
+               {"CSC3556", },
                {"INT33FE", },
                {"INT3515", },
                /* Non-conforming _HID for Cirrus Logic already released */
@@ -2615,7 +2619,6 @@ void __init acpi_scan_init(void)
        acpi_watchdog_init();
        acpi_pnp_init();
        acpi_int340x_thermal_init();
-       acpi_amba_init();
        acpi_init_lpit();
 
        acpi_scan_add_handler(&generic_device_handler);
index f9f6ebb..419590f 100644 (file)
@@ -82,10 +82,6 @@ static int tzp;
 module_param(tzp, int, 0444);
 MODULE_PARM_DESC(tzp, "Thermal zone polling frequency, in 1/10 seconds.");
 
-static int nocrt;
-module_param(nocrt, int, 0);
-MODULE_PARM_DESC(nocrt, "Set to take no action upon ACPI thermal zone critical trips points.");
-
 static int off;
 module_param(off, int, 0);
 MODULE_PARM_DESC(off, "Set to disable ACPI thermal support.");
@@ -96,35 +92,27 @@ MODULE_PARM_DESC(psv, "Disable or override all passive trip points.");
 
 static struct workqueue_struct *acpi_thermal_pm_queue;
 
-struct acpi_thermal_critical {
-       unsigned long temperature;
-       bool valid;
-};
-
-struct acpi_thermal_hot {
+struct acpi_thermal_trip {
        unsigned long temperature;
        bool valid;
 };
 
 struct acpi_thermal_passive {
+       struct acpi_thermal_trip trip;
        struct acpi_handle_list devices;
-       unsigned long temperature;
        unsigned long tc1;
        unsigned long tc2;
        unsigned long tsp;
-       bool valid;
 };
 
 struct acpi_thermal_active {
+       struct acpi_thermal_trip trip;
        struct acpi_handle_list devices;
-       unsigned long temperature;
-       bool valid;
-       bool enabled;
 };
 
 struct acpi_thermal_trips {
-       struct acpi_thermal_critical critical;
-       struct acpi_thermal_hot hot;
+       struct acpi_thermal_trip critical;
+       struct acpi_thermal_trip hot;
        struct acpi_thermal_passive passive;
        struct acpi_thermal_active active[ACPI_THERMAL_MAX_ACTIVE];
 };
@@ -137,6 +125,7 @@ struct acpi_thermal {
        unsigned long polling_frequency;
        volatile u8 zombie;
        struct acpi_thermal_trips trips;
+       struct thermal_trip *trip_table;
        struct acpi_handle_list devices;
        struct thermal_zone_device *thermal_zone;
        int kelvin_offset;      /* in millidegrees */
@@ -190,7 +179,16 @@ static int acpi_thermal_get_polling_frequency(struct acpi_thermal *tz)
        return 0;
 }
 
-static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
+static int acpi_thermal_temp(struct acpi_thermal *tz, int temp_deci_k)
+{
+       if (temp_deci_k == THERMAL_TEMP_INVALID)
+               return THERMAL_TEMP_INVALID;
+
+       return deci_kelvin_to_millicelsius_with_offset(temp_deci_k,
+                                                      tz->kelvin_offset);
+}
+
+static void __acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
 {
        acpi_status status;
        unsigned long long tmp;
@@ -255,9 +253,9 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
        }
 
        /* Passive (optional) */
-       if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.valid) ||
+       if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.trip.valid) ||
            flag == ACPI_TRIPS_INIT) {
-               valid = tz->trips.passive.valid;
+               valid = tz->trips.passive.trip.valid;
                if (psv == -1) {
                        status = AE_SUPPORT;
                } else if (psv > 0) {
@@ -269,44 +267,44 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
                }
 
                if (ACPI_FAILURE(status)) {
-                       tz->trips.passive.valid = false;
+                       tz->trips.passive.trip.valid = false;
                } else {
-                       tz->trips.passive.temperature = tmp;
-                       tz->trips.passive.valid = true;
+                       tz->trips.passive.trip.temperature = tmp;
+                       tz->trips.passive.trip.valid = true;
                        if (flag == ACPI_TRIPS_INIT) {
                                status = acpi_evaluate_integer(tz->device->handle,
                                                               "_TC1", NULL, &tmp);
                                if (ACPI_FAILURE(status))
-                                       tz->trips.passive.valid = false;
+                                       tz->trips.passive.trip.valid = false;
                                else
                                        tz->trips.passive.tc1 = tmp;
 
                                status = acpi_evaluate_integer(tz->device->handle,
                                                               "_TC2", NULL, &tmp);
                                if (ACPI_FAILURE(status))
-                                       tz->trips.passive.valid = false;
+                                       tz->trips.passive.trip.valid = false;
                                else
                                        tz->trips.passive.tc2 = tmp;
 
                                status = acpi_evaluate_integer(tz->device->handle,
                                                               "_TSP", NULL, &tmp);
                                if (ACPI_FAILURE(status))
-                                       tz->trips.passive.valid = false;
+                                       tz->trips.passive.trip.valid = false;
                                else
                                        tz->trips.passive.tsp = tmp;
                        }
                }
        }
-       if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.passive.valid) {
+       if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.passive.trip.valid) {
                memset(&devices, 0, sizeof(struct acpi_handle_list));
                status = acpi_evaluate_reference(tz->device->handle, "_PSL",
                                                 NULL, &devices);
                if (ACPI_FAILURE(status)) {
                        acpi_handle_info(tz->device->handle,
                                         "Invalid passive threshold\n");
-                       tz->trips.passive.valid = false;
+                       tz->trips.passive.trip.valid = false;
                } else {
-                       tz->trips.passive.valid = true;
+                       tz->trips.passive.trip.valid = true;
                }
 
                if (memcmp(&tz->trips.passive.devices, &devices,
@@ -317,24 +315,24 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
                }
        }
        if ((flag & ACPI_TRIPS_PASSIVE) || (flag & ACPI_TRIPS_DEVICES)) {
-               if (valid != tz->trips.passive.valid)
+               if (valid != tz->trips.passive.trip.valid)
                        ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "state");
        }
 
        /* Active (optional) */
        for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
                char name[5] = { '_', 'A', 'C', ('0' + i), '\0' };
-               valid = tz->trips.active[i].valid;
+               valid = tz->trips.active[i].trip.valid;
 
                if (act == -1)
                        break; /* disable all active trip points */
 
                if (flag == ACPI_TRIPS_INIT || ((flag & ACPI_TRIPS_ACTIVE) &&
-                   tz->trips.active[i].valid)) {
+                   tz->trips.active[i].trip.valid)) {
                        status = acpi_evaluate_integer(tz->device->handle,
                                                       name, NULL, &tmp);
                        if (ACPI_FAILURE(status)) {
-                               tz->trips.active[i].valid = false;
+                               tz->trips.active[i].trip.valid = false;
                                if (i == 0)
                                        break;
 
@@ -342,35 +340,36 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
                                        break;
 
                                if (i == 1)
-                                       tz->trips.active[0].temperature = celsius_to_deci_kelvin(act);
+                                       tz->trips.active[0].trip.temperature =
+                                                       celsius_to_deci_kelvin(act);
                                else
                                        /*
                                         * Don't allow override higher than
                                         * the next higher trip point
                                         */
-                                       tz->trips.active[i-1].temperature =
+                                       tz->trips.active[i-1].trip.temperature =
                                                min_t(unsigned long,
-                                                     tz->trips.active[i-2].temperature,
+                                                     tz->trips.active[i-2].trip.temperature,
                                                      celsius_to_deci_kelvin(act));
 
                                break;
                        } else {
-                               tz->trips.active[i].temperature = tmp;
-                               tz->trips.active[i].valid = true;
+                               tz->trips.active[i].trip.temperature = tmp;
+                               tz->trips.active[i].trip.valid = true;
                        }
                }
 
                name[2] = 'L';
-               if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.active[i].valid) {
+               if ((flag & ACPI_TRIPS_DEVICES) && tz->trips.active[i].trip.valid) {
                        memset(&devices, 0, sizeof(struct acpi_handle_list));
                        status = acpi_evaluate_reference(tz->device->handle,
                                                         name, NULL, &devices);
                        if (ACPI_FAILURE(status)) {
                                acpi_handle_info(tz->device->handle,
                                                 "Invalid active%d threshold\n", i);
-                               tz->trips.active[i].valid = false;
+                               tz->trips.active[i].trip.valid = false;
                        } else {
-                               tz->trips.active[i].valid = true;
+                               tz->trips.active[i].trip.valid = true;
                        }
 
                        if (memcmp(&tz->trips.active[i].devices, &devices,
@@ -381,10 +380,10 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
                        }
                }
                if ((flag & ACPI_TRIPS_ACTIVE) || (flag & ACPI_TRIPS_DEVICES))
-                       if (valid != tz->trips.active[i].valid)
+                       if (valid != tz->trips.active[i].trip.valid)
                                ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "state");
 
-               if (!tz->trips.active[i].valid)
+               if (!tz->trips.active[i].trip.valid)
                        break;
        }
 
@@ -398,24 +397,73 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag)
                        ACPI_THERMAL_TRIPS_EXCEPTION(flag, tz, "device");
                }
        }
+}
+
+static int acpi_thermal_adjust_trip(struct thermal_trip *trip, void *data)
+{
+       struct acpi_thermal_trip *acpi_trip = trip->priv;
+       struct acpi_thermal *tz = data;
+
+       if (!acpi_trip)
+               return 0;
+
+       if (acpi_trip->valid)
+               trip->temperature = acpi_thermal_temp(tz, acpi_trip->temperature);
+       else
+               trip->temperature = THERMAL_TEMP_INVALID;
 
        return 0;
 }
 
+static void acpi_thermal_adjust_thermal_zone(struct thermal_zone_device *thermal,
+                                            unsigned long data)
+{
+       struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
+       int flag = data == ACPI_THERMAL_NOTIFY_THRESHOLDS ?
+                               ACPI_TRIPS_THRESHOLDS : ACPI_TRIPS_DEVICES;
+
+       __acpi_thermal_trips_update(tz, flag);
+
+       for_each_thermal_trip(tz->thermal_zone, acpi_thermal_adjust_trip, tz);
+}
+
+static void acpi_queue_thermal_check(struct acpi_thermal *tz)
+{
+       if (!work_pending(&tz->thermal_check_work))
+               queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work);
+}
+
+static void acpi_thermal_trips_update(struct acpi_thermal *tz, u32 event)
+{
+       struct acpi_device *adev = tz->device;
+
+       /*
+        * Use thermal_zone_device_exec() to carry out the trip points
+        * update, so as to protect thermal_get_trend() from getting stale
+        * trip point temperatures and to prevent thermal_zone_device_update()
+        * invoked from acpi_thermal_check_fn() from producing inconsistent
+        * results.
+        */
+       thermal_zone_device_exec(tz->thermal_zone,
+                                acpi_thermal_adjust_thermal_zone, event);
+       acpi_queue_thermal_check(tz);
+       acpi_bus_generate_netlink_event(adev->pnp.device_class,
+                                       dev_name(&adev->dev), event, 0);
+}
+
 static int acpi_thermal_get_trip_points(struct acpi_thermal *tz)
 {
-       int i, ret = acpi_thermal_trips_update(tz, ACPI_TRIPS_INIT);
        bool valid;
+       int i;
 
-       if (ret)
-               return ret;
+       __acpi_thermal_trips_update(tz, ACPI_TRIPS_INIT);
 
        valid = tz->trips.critical.valid |
                tz->trips.hot.valid |
-               tz->trips.passive.valid;
+               tz->trips.passive.trip.valid;
 
        for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++)
-               valid = valid || tz->trips.active[i].valid;
+               valid = valid || tz->trips.active[i].trip.valid;
 
        if (!valid) {
                pr_warn(FW_BUG "No valid trip found\n");
@@ -443,159 +491,55 @@ static int thermal_get_temp(struct thermal_zone_device *thermal, int *temp)
        return 0;
 }
 
-static int thermal_get_trip_type(struct thermal_zone_device *thermal,
-                                int trip, enum thermal_trip_type *type)
+static int thermal_get_trend(struct thermal_zone_device *thermal,
+                            int trip_index, enum thermal_trend *trend)
 {
        struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
-       int i;
+       struct acpi_thermal_trip *acpi_trip;
+       int t, i;
 
-       if (!tz || trip < 0)
+       if (!tz || trip_index < 0)
                return -EINVAL;
 
-       if (tz->trips.critical.valid) {
-               if (!trip) {
-                       *type = THERMAL_TRIP_CRITICAL;
-                       return 0;
-               }
-               trip--;
-       }
-
-       if (tz->trips.hot.valid) {
-               if (!trip) {
-                       *type = THERMAL_TRIP_HOT;
-                       return 0;
-               }
-               trip--;
-       }
-
-       if (tz->trips.passive.valid) {
-               if (!trip) {
-                       *type = THERMAL_TRIP_PASSIVE;
-                       return 0;
-               }
-               trip--;
-       }
-
-       for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].valid; i++) {
-               if (!trip) {
-                       *type = THERMAL_TRIP_ACTIVE;
-                       return 0;
-               }
-               trip--;
-       }
-
-       return -EINVAL;
-}
+       if (tz->trips.critical.valid)
+               trip_index--;
 
-static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
-                                int trip, int *temp)
-{
-       struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
-       int i;
+       if (tz->trips.hot.valid)
+               trip_index--;
 
-       if (!tz || trip < 0)
+       if (trip_index < 0)
                return -EINVAL;
 
-       if (tz->trips.critical.valid) {
-               if (!trip) {
-                       *temp = deci_kelvin_to_millicelsius_with_offset(
-                                       tz->trips.critical.temperature,
-                                       tz->kelvin_offset);
-                       return 0;
-               }
-               trip--;
-       }
-
-       if (tz->trips.hot.valid) {
-               if (!trip) {
-                       *temp = deci_kelvin_to_millicelsius_with_offset(
-                                       tz->trips.hot.temperature,
-                                       tz->kelvin_offset);
-                       return 0;
-               }
-               trip--;
-       }
-
-       if (tz->trips.passive.valid) {
-               if (!trip) {
-                       *temp = deci_kelvin_to_millicelsius_with_offset(
-                                       tz->trips.passive.temperature,
-                                       tz->kelvin_offset);
-                       return 0;
-               }
-               trip--;
-       }
-
-       for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
-               tz->trips.active[i].valid; i++) {
-               if (!trip) {
-                       *temp = deci_kelvin_to_millicelsius_with_offset(
-                                       tz->trips.active[i].temperature,
-                                       tz->kelvin_offset);
-                       return 0;
-               }
-               trip--;
-       }
-
-       return -EINVAL;
-}
-
-static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
-                               int *temperature)
-{
-       struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
+       acpi_trip = &tz->trips.passive.trip;
+       if (acpi_trip->valid && !trip_index--) {
+               t = tz->trips.passive.tc1 * (tz->temperature -
+                                               tz->last_temperature) +
+                       tz->trips.passive.tc2 * (tz->temperature -
+                                               acpi_trip->temperature);
+               if (t > 0)
+                       *trend = THERMAL_TREND_RAISING;
+               else if (t < 0)
+                       *trend = THERMAL_TREND_DROPPING;
+               else
+                       *trend = THERMAL_TREND_STABLE;
 
-       if (tz->trips.critical.valid) {
-               *temperature = deci_kelvin_to_millicelsius_with_offset(
-                                       tz->trips.critical.temperature,
-                                       tz->kelvin_offset);
                return 0;
        }
 
-       return -EINVAL;
-}
-
-static int thermal_get_trend(struct thermal_zone_device *thermal,
-                            int trip, enum thermal_trend *trend)
-{
-       struct acpi_thermal *tz = thermal_zone_device_priv(thermal);
-       enum thermal_trip_type type;
-       int i;
-
-       if (thermal_get_trip_type(thermal, trip, &type))
-               return -EINVAL;
-
-       if (type == THERMAL_TRIP_ACTIVE) {
-               int trip_temp;
-               int temp = deci_kelvin_to_millicelsius_with_offset(
-                                       tz->temperature, tz->kelvin_offset);
-               if (thermal_get_trip_temp(thermal, trip, &trip_temp))
-                       return -EINVAL;
+       t = acpi_thermal_temp(tz, tz->temperature);
 
-               if (temp > trip_temp) {
-                       *trend = THERMAL_TREND_RAISING;
-                       return 0;
-               } else {
-                       /* Fall back on default trend */
-                       return -EINVAL;
+       for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
+               acpi_trip = &tz->trips.active[i].trip;
+               if (acpi_trip->valid && !trip_index--) {
+                       if (t > acpi_thermal_temp(tz, acpi_trip->temperature)) {
+                               *trend = THERMAL_TREND_RAISING;
+                               return 0;
+                       }
+                       break;
                }
        }
 
-       /*
-        * tz->temperature has already been updated by generic thermal layer,
-        * before this callback being invoked
-        */
-       i = tz->trips.passive.tc1 * (tz->temperature - tz->last_temperature) +
-           tz->trips.passive.tc2 * (tz->temperature - tz->trips.passive.temperature);
-
-       if (i > 0)
-               *trend = THERMAL_TREND_RAISING;
-       else if (i < 0)
-               *trend = THERMAL_TREND_DROPPING;
-       else
-               *trend = THERMAL_TREND_STABLE;
-
-       return 0;
+       return -EINVAL;
 }
 
 static void acpi_thermal_zone_device_hot(struct thermal_zone_device *thermal)
@@ -637,7 +581,7 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
        if (tz->trips.hot.valid)
                trip++;
 
-       if (tz->trips.passive.valid) {
+       if (tz->trips.passive.trip.valid) {
                trip++;
                for (i = 0; i < tz->trips.passive.devices.count; i++) {
                        handle = tz->trips.passive.devices.handles[i];
@@ -662,7 +606,7 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal,
        }
 
        for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
-               if (!tz->trips.active[i].valid)
+               if (!tz->trips.active[i].trip.valid)
                        break;
 
                trip++;
@@ -709,9 +653,6 @@ static struct thermal_zone_device_ops acpi_thermal_zone_ops = {
        .bind = acpi_thermal_bind_cooling_device,
        .unbind = acpi_thermal_unbind_cooling_device,
        .get_temp = thermal_get_temp,
-       .get_trip_type = thermal_get_trip_type,
-       .get_trip_temp = thermal_get_trip_temp,
-       .get_crit_temp = thermal_get_crit_temp,
        .get_trend = thermal_get_trend,
        .hot = acpi_thermal_zone_device_hot,
        .critical = acpi_thermal_zone_device_critical,
@@ -745,63 +686,97 @@ static void acpi_thermal_zone_sysfs_remove(struct acpi_thermal *tz)
 
 static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz)
 {
-       int trips = 0;
+       struct acpi_thermal_trip *acpi_trip;
+       struct thermal_trip *trip;
+       int passive_delay = 0;
+       int trip_count = 0;
        int result;
-       acpi_status status;
        int i;
 
        if (tz->trips.critical.valid)
-               trips++;
+               trip_count++;
 
        if (tz->trips.hot.valid)
-               trips++;
+               trip_count++;
+
+       if (tz->trips.passive.trip.valid) {
+               trip_count++;
+               passive_delay = tz->trips.passive.tsp * 100;
+       }
 
-       if (tz->trips.passive.valid)
-               trips++;
+       for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].trip.valid; i++)
+               trip_count++;
 
-       for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE && tz->trips.active[i].valid;
-            i++, trips++);
+       trip = kcalloc(trip_count, sizeof(*trip), GFP_KERNEL);
+       if (!trip)
+               return -ENOMEM;
 
-       if (tz->trips.passive.valid)
-               tz->thermal_zone = thermal_zone_device_register("acpitz", trips, 0, tz,
-                                                               &acpi_thermal_zone_ops, NULL,
-                                                               tz->trips.passive.tsp * 100,
-                                                               tz->polling_frequency * 100);
-       else
-               tz->thermal_zone =
-                       thermal_zone_device_register("acpitz", trips, 0, tz,
-                                                    &acpi_thermal_zone_ops, NULL,
-                                                    0, tz->polling_frequency * 100);
+       tz->trip_table = trip;
 
-       if (IS_ERR(tz->thermal_zone))
-               return -ENODEV;
+       if (tz->trips.critical.valid) {
+               trip->type = THERMAL_TRIP_CRITICAL;
+               trip->temperature = acpi_thermal_temp(tz, tz->trips.critical.temperature);
+               trip++;
+       }
+
+       if (tz->trips.hot.valid) {
+               trip->type = THERMAL_TRIP_HOT;
+               trip->temperature = acpi_thermal_temp(tz, tz->trips.hot.temperature);
+               trip++;
+       }
+
+       acpi_trip = &tz->trips.passive.trip;
+       if (acpi_trip->valid) {
+               trip->type = THERMAL_TRIP_PASSIVE;
+               trip->temperature = acpi_thermal_temp(tz, acpi_trip->temperature);
+               trip->priv = acpi_trip;
+               trip++;
+       }
+
+       for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
+               acpi_trip = &tz->trips.active[i].trip;
+
+               if (!acpi_trip->valid)
+                       break;
+
+               trip->type = THERMAL_TRIP_ACTIVE;
+               trip->temperature = acpi_thermal_temp(tz, acpi_trip->temperature);
+               trip->priv = acpi_trip;
+               trip++;
+       }
+
+       tz->thermal_zone = thermal_zone_device_register_with_trips("acpitz",
+                                                                  tz->trip_table,
+                                                                  trip_count,
+                                                                  0, tz,
+                                                                  &acpi_thermal_zone_ops,
+                                                                  NULL,
+                                                                  passive_delay,
+                                                                  tz->polling_frequency * 100);
+       if (IS_ERR(tz->thermal_zone)) {
+               result = PTR_ERR(tz->thermal_zone);
+               goto free_trip_table;
+       }
 
        result = acpi_thermal_zone_sysfs_add(tz);
        if (result)
                goto unregister_tzd;
 
-       status =  acpi_bus_attach_private_data(tz->device->handle,
-                                              tz->thermal_zone);
-       if (ACPI_FAILURE(status)) {
-               result = -ENODEV;
-               goto remove_links;
-       }
-
        result = thermal_zone_device_enable(tz->thermal_zone);
        if (result)
-               goto acpi_bus_detach;
+               goto remove_links;
 
        dev_info(&tz->device->dev, "registered as thermal_zone%d\n",
                 thermal_zone_device_id(tz->thermal_zone));
 
        return 0;
 
-acpi_bus_detach:
-       acpi_bus_detach_private_data(tz->device->handle);
 remove_links:
        acpi_thermal_zone_sysfs_remove(tz);
 unregister_tzd:
        thermal_zone_device_unregister(tz->thermal_zone);
+free_trip_table:
+       kfree(tz->trip_table);
 
        return result;
 }
@@ -810,8 +785,8 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz)
 {
        acpi_thermal_zone_sysfs_remove(tz);
        thermal_zone_device_unregister(tz->thermal_zone);
+       kfree(tz->trip_table);
        tz->thermal_zone = NULL;
-       acpi_bus_detach_private_data(tz->device->handle);
 }
 
 
@@ -819,14 +794,9 @@ static void acpi_thermal_unregister_thermal_zone(struct acpi_thermal *tz)
                                  Driver Interface
    -------------------------------------------------------------------------- */
 
-static void acpi_queue_thermal_check(struct acpi_thermal *tz)
-{
-       if (!work_pending(&tz->thermal_check_work))
-               queue_work(acpi_thermal_pm_queue, &tz->thermal_check_work);
-}
-
-static void acpi_thermal_notify(struct acpi_device *device, u32 event)
+static void acpi_thermal_notify(acpi_handle handle, u32 event, void *data)
 {
+       struct acpi_device *device = data;
        struct acpi_thermal *tz = acpi_driver_data(device);
 
        if (!tz)
@@ -837,16 +807,8 @@ static void acpi_thermal_notify(struct acpi_device *device, u32 event)
                acpi_queue_thermal_check(tz);
                break;
        case ACPI_THERMAL_NOTIFY_THRESHOLDS:
-               acpi_thermal_trips_update(tz, ACPI_TRIPS_THRESHOLDS);
-               acpi_queue_thermal_check(tz);
-               acpi_bus_generate_netlink_event(device->pnp.device_class,
-                                               dev_name(&device->dev), event, 0);
-               break;
        case ACPI_THERMAL_NOTIFY_DEVICES:
-               acpi_thermal_trips_update(tz, ACPI_TRIPS_DEVICES);
-               acpi_queue_thermal_check(tz);
-               acpi_bus_generate_netlink_event(device->pnp.device_class,
-                                               dev_name(&device->dev), event, 0);
+               acpi_thermal_trips_update(tz, event);
                break;
        default:
                acpi_handle_debug(device->handle, "Unsupported event [0x%x]\n",
@@ -997,11 +959,20 @@ static int acpi_thermal_add(struct acpi_device *device)
 
        pr_info("%s [%s] (%ld C)\n", acpi_device_name(device),
                acpi_device_bid(device), deci_kelvin_to_celsius(tz->temperature));
-       goto end;
 
+       result = acpi_dev_install_notify_handler(device, ACPI_DEVICE_NOTIFY,
+                                                acpi_thermal_notify);
+       if (result)
+               goto flush_wq;
+
+       return 0;
+
+flush_wq:
+       flush_workqueue(acpi_thermal_pm_queue);
+       acpi_thermal_unregister_thermal_zone(tz);
 free_memory:
        kfree(tz);
-end:
+
        return result;
 }
 
@@ -1012,10 +983,14 @@ static void acpi_thermal_remove(struct acpi_device *device)
        if (!device || !acpi_driver_data(device))
                return;
 
-       flush_workqueue(acpi_thermal_pm_queue);
        tz = acpi_driver_data(device);
 
+       acpi_dev_remove_notify_handler(device, ACPI_DEVICE_NOTIFY,
+                                      acpi_thermal_notify);
+
+       flush_workqueue(acpi_thermal_pm_queue);
        acpi_thermal_unregister_thermal_zone(tz);
+
        kfree(tz);
 }
 
@@ -1030,7 +1005,7 @@ static int acpi_thermal_suspend(struct device *dev)
 static int acpi_thermal_resume(struct device *dev)
 {
        struct acpi_thermal *tz;
-       int i, j, power_state, result;
+       int i, j, power_state;
 
        if (!dev)
                return -EINVAL;
@@ -1040,18 +1015,12 @@ static int acpi_thermal_resume(struct device *dev)
                return -EINVAL;
 
        for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
-               if (!tz->trips.active[i].valid)
+               if (!tz->trips.active[i].trip.valid)
                        break;
 
-               tz->trips.active[i].enabled = true;
                for (j = 0; j < tz->trips.active[i].devices.count; j++) {
-                       result = acpi_bus_update_power(
-                                       tz->trips.active[i].devices.handles[j],
-                                       &power_state);
-                       if (result || (power_state != ACPI_STATE_D0)) {
-                               tz->trips.active[i].enabled = false;
-                               break;
-                       }
+                       acpi_bus_update_power(tz->trips.active[i].devices.handles[j],
+                                             &power_state);
                }
        }
 
@@ -1078,7 +1047,6 @@ static struct acpi_driver acpi_thermal_driver = {
        .ops = {
                .add = acpi_thermal_add,
                .remove = acpi_thermal_remove,
-               .notify = acpi_thermal_notify,
                },
        .drv.pm = &acpi_thermal_pm,
 };
@@ -1094,7 +1062,7 @@ static int thermal_act(const struct dmi_system_id *d) {
 static int thermal_nocrt(const struct dmi_system_id *d) {
        pr_notice("%s detected: disabling all critical thermal trip point actions.\n",
                  d->ident);
-       nocrt = 1;
+       crt = -1;
        return 0;
 }
 static int thermal_tzp(const struct dmi_system_id *d) {
index 18cc08c..442396f 100644 (file)
@@ -446,6 +446,15 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
                },
        },
        {
+        /* https://bugzilla.suse.com/show_bug.cgi?id=1208724 */
+        .callback = video_detect_force_native,
+        /* Lenovo Ideapad Z470 */
+        .matches = {
+               DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+               DMI_MATCH(DMI_PRODUCT_VERSION, "IdeaPad Z470"),
+               },
+       },
+       {
         /* https://bugzilla.redhat.com/show_bug.cgi?id=1187004 */
         .callback = video_detect_force_native,
         /* Lenovo Ideapad Z570 */
@@ -487,6 +496,24 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
                },
        },
        {
+        /* https://gitlab.freedesktop.org/drm/amd/-/issues/1838 */
+        .callback = video_detect_force_native,
+        /* Apple iMac12,1 */
+        .matches = {
+               DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+               DMI_MATCH(DMI_PRODUCT_NAME, "iMac12,1"),
+               },
+       },
+       {
+        /* https://gitlab.freedesktop.org/drm/amd/-/issues/2753 */
+        .callback = video_detect_force_native,
+        /* Apple iMac12,2 */
+        .matches = {
+               DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+               DMI_MATCH(DMI_PRODUCT_NAME, "iMac12,2"),
+               },
+       },
+       {
         /* https://bugzilla.redhat.com/show_bug.cgi?id=1217249 */
         .callback = video_detect_force_native,
         /* Apple MacBook Pro 12,1 */
index ce62e61..08f7c67 100644 (file)
@@ -94,6 +94,11 @@ static struct lpi_constraints *lpi_constraints_table;
 static int lpi_constraints_table_size;
 static int rev_id;
 
+#define for_each_lpi_constraint(entry)                                         \
+       for (int i = 0;                                                         \
+            entry = &lpi_constraints_table[i], i < lpi_constraints_table_size; \
+            i++)
+
 static void lpi_device_get_constraints_amd(void)
 {
        union acpi_object *out_obj;
@@ -113,6 +118,12 @@ static void lpi_device_get_constraints_amd(void)
                union acpi_object *package = &out_obj->package.elements[i];
 
                if (package->type == ACPI_TYPE_PACKAGE) {
+                       if (lpi_constraints_table) {
+                               acpi_handle_err(lps0_device_handle,
+                                               "Duplicate constraints list\n");
+                               goto free_acpi_buffer;
+                       }
+
                        lpi_constraints_table = kcalloc(package->package.count,
                                                        sizeof(*lpi_constraints_table),
                                                        GFP_KERNEL);
@@ -123,17 +134,16 @@ static void lpi_device_get_constraints_amd(void)
                        acpi_handle_debug(lps0_device_handle,
                                          "LPI: constraints list begin:\n");
 
-                       for (j = 0; j < package->package.count; ++j) {
+                       for (j = 0; j < package->package.count; j++) {
                                union acpi_object *info_obj = &package->package.elements[j];
                                struct lpi_device_constraint_amd dev_info = {};
                                struct lpi_constraints *list;
                                acpi_status status;
 
-                               for (k = 0; k < info_obj->package.count; ++k) {
-                                       union acpi_object *obj = &info_obj->package.elements[k];
+                               list = &lpi_constraints_table[lpi_constraints_table_size];
 
-                                       list = &lpi_constraints_table[lpi_constraints_table_size];
-                                       list->min_dstate = -1;
+                               for (k = 0; k < info_obj->package.count; k++) {
+                                       union acpi_object *obj = &info_obj->package.elements[k];
 
                                        switch (k) {
                                        case 0:
@@ -149,27 +159,25 @@ static void lpi_device_get_constraints_amd(void)
                                                dev_info.min_dstate = obj->integer.value;
                                                break;
                                        }
+                               }
 
-                                       if (!dev_info.enabled || !dev_info.name ||
-                                           !dev_info.min_dstate)
-                                               continue;
+                               acpi_handle_debug(lps0_device_handle,
+                                                 "Name:%s, Enabled: %d, States: %d, MinDstate: %d\n",
+                                                 dev_info.name,
+                                                 dev_info.enabled,
+                                                 dev_info.function_states,
+                                                 dev_info.min_dstate);
 
-                                       status = acpi_get_handle(NULL, dev_info.name,
-                                                                &list->handle);
-                                       if (ACPI_FAILURE(status))
-                                               continue;
+                               if (!dev_info.enabled || !dev_info.name ||
+                                   !dev_info.min_dstate)
+                                       continue;
 
-                                       acpi_handle_debug(lps0_device_handle,
-                                                         "Name:%s\n", dev_info.name);
+                               status = acpi_get_handle(NULL, dev_info.name, &list->handle);
+                               if (ACPI_FAILURE(status))
+                                       continue;
 
-                                       list->min_dstate = dev_info.min_dstate;
+                               list->min_dstate = dev_info.min_dstate;
 
-                                       if (list->min_dstate < 0) {
-                                               acpi_handle_debug(lps0_device_handle,
-                                                                 "Incomplete constraint defined\n");
-                                               continue;
-                                       }
-                               }
                                lpi_constraints_table_size++;
                        }
                }
@@ -214,7 +222,7 @@ static void lpi_device_get_constraints(void)
                if (!package)
                        continue;
 
-               for (j = 0; j < package->package.count; ++j) {
+               for (j = 0; j < package->package.count; j++) {
                        union acpi_object *element =
                                        &(package->package.elements[j]);
 
@@ -246,7 +254,7 @@ static void lpi_device_get_constraints(void)
 
                constraint->min_dstate = -1;
 
-               for (j = 0; j < package_count; ++j) {
+               for (j = 0; j < package_count; j++) {
                        union acpi_object *info_obj = &info.package[j];
                        union acpi_object *cnstr_pkg;
                        union acpi_object *obj;
@@ -291,32 +299,55 @@ free_acpi_buffer:
        ACPI_FREE(out_obj);
 }
 
+/**
+ * acpi_get_lps0_constraint - Get the LPS0 constraint for a device.
+ * @adev: Device to get the constraint for.
+ *
+ * The LPS0 constraint is the shallowest (minimum) power state in which the
+ * device can be so as to allow the platform as a whole to achieve additional
+ * energy conservation by utilizing a system-wide low-power state.
+ *
+ * Returns:
+ *  - ACPI power state value of the constraint for @adev on success.
+ *  - Otherwise, ACPI_STATE_UNKNOWN.
+ */
+int acpi_get_lps0_constraint(struct acpi_device *adev)
+{
+       struct lpi_constraints *entry;
+
+       for_each_lpi_constraint(entry) {
+               if (adev->handle == entry->handle)
+                       return entry->min_dstate;
+       }
+
+       return ACPI_STATE_UNKNOWN;
+}
+
 static void lpi_check_constraints(void)
 {
-       int i;
+       struct lpi_constraints *entry;
 
-       for (i = 0; i < lpi_constraints_table_size; ++i) {
-               acpi_handle handle = lpi_constraints_table[i].handle;
-               struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
+       for_each_lpi_constraint(entry) {
+               struct acpi_device *adev = acpi_fetch_acpi_dev(entry->handle);
 
                if (!adev)
                        continue;
 
-               acpi_handle_debug(handle,
+               acpi_handle_debug(entry->handle,
                        "LPI: required min power state:%s current power state:%s\n",
-                       acpi_power_state_string(lpi_constraints_table[i].min_dstate),
+                       acpi_power_state_string(entry->min_dstate),
                        acpi_power_state_string(adev->power.state));
 
                if (!adev->flags.power_manageable) {
-                       acpi_handle_info(handle, "LPI: Device not power manageable\n");
-                       lpi_constraints_table[i].handle = NULL;
+                       acpi_handle_info(entry->handle, "LPI: Device not power manageable\n");
+                       entry->handle = NULL;
                        continue;
                }
 
-               if (adev->power.state < lpi_constraints_table[i].min_dstate)
-                       acpi_handle_info(handle,
+               if (adev->power.state < entry->min_dstate)
+                       acpi_handle_info(entry->handle,
                                "LPI: Constraint not met; min power state:%s current power state:%s\n",
-                               acpi_power_state_string(lpi_constraints_table[i].min_dstate),
+                               acpi_power_state_string(entry->min_dstate),
                                acpi_power_state_string(adev->power.state));
        }
 }
index c2b925f..63d834d 100644 (file)
@@ -518,3 +518,38 @@ bool acpi_quirk_skip_acpi_ac_and_battery(void)
        return false;
 }
 EXPORT_SYMBOL_GPL(acpi_quirk_skip_acpi_ac_and_battery);
+
+/* This section provides a workaround for a specific x86 system
+ * which requires disabling of mwait to work correctly.
+ */
+static int __init acpi_proc_quirk_set_no_mwait(const struct dmi_system_id *id)
+{
+       pr_notice("%s detected - disabling mwait for CPU C-states\n",
+                 id->ident);
+       boot_option_idle_override = IDLE_NOMWAIT;
+       return 0;
+}
+
+static const struct dmi_system_id acpi_proc_quirk_mwait_dmi_table[] __initconst = {
+       {
+               .callback = acpi_proc_quirk_set_no_mwait,
+               .ident = "Extensa 5220",
+               .matches =  {
+                       DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
+                       DMI_MATCH(DMI_BOARD_NAME, "Columbia"),
+               },
+               .driver_data = NULL,
+       },
+       {}
+};
+
+void __init acpi_proc_quirk_mwait_check(void)
+{
+       /*
+        * Check whether the system is DMI table. If yes, OSPM
+        * should not use mwait for CPU-states.
+        */
+       dmi_check_system(acpi_proc_quirk_mwait_dmi_table);
+}
index 486c827..d720f93 100644 (file)
@@ -6617,6 +6617,7 @@ err_init_binder_device_failed:
 
 err_alloc_device_names_failed:
        debugfs_remove_recursive(binder_debugfs_dir_entry_root);
+       binder_alloc_shrinker_exit();
 
        return ret;
 }
index 662a2a2..e3db829 100644 (file)
@@ -1087,6 +1087,12 @@ int binder_alloc_shrinker_init(void)
        return ret;
 }
 
+void binder_alloc_shrinker_exit(void)
+{
+       unregister_shrinker(&binder_shrinker);
+       list_lru_destroy(&binder_alloc_lru);
+}
+
 /**
  * check_buffer() - verify that buffer/offset is safe to access
  * @alloc: binder_alloc for this proc
index 138d1d5..dc1e2b0 100644 (file)
@@ -129,6 +129,7 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
                                                  int pid);
 extern void binder_alloc_init(struct binder_alloc *alloc);
 extern int binder_alloc_shrinker_init(void);
+extern void binder_alloc_shrinker_exit(void);
 extern void binder_alloc_vma_close(struct binder_alloc *alloc);
 extern struct binder_buffer *
 binder_alloc_prepare_to_free(struct binder_alloc *alloc,
index 76e7d66..faebe9f 100644 (file)
@@ -153,7 +153,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode,
                goto err;
 
        inode->i_ino = minor + INODE_OFFSET;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        init_special_inode(inode, S_IFCHR | 0600,
                           MKDEV(MAJOR(binderfs_dev), minor));
        inode->i_fop = &binder_fops;
@@ -432,7 +432,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb)
        }
 
        inode->i_ino = SECOND_INODE;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        init_special_inode(inode, S_IFCHR | 0600,
                           MKDEV(MAJOR(binderfs_dev), minor));
        inode->i_fop = &binder_ctl_fops;
@@ -474,7 +474,7 @@ static struct inode *binderfs_make_inode(struct super_block *sb, int mode)
        if (ret) {
                ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET);
                ret->i_mode = mode;
-               ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+               ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
        }
        return ret;
 }
@@ -703,7 +703,7 @@ static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc)
        inode->i_ino = FIRST_INODE;
        inode->i_fop = &simple_dir_operations;
        inode->i_mode = S_IFDIR | 0755;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_op = &binderfs_dir_inode_operations;
        set_nlink(inode, 2);
 
index d37ab60..04db0f2 100644 (file)
@@ -4938,8 +4938,8 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
                if (qc->result_tf.status & ATA_SENSE &&
                    ((ata_is_ncq(qc->tf.protocol) &&
                      dev->flags & ATA_DFLAG_CDL_ENABLED) ||
-                    (!(ata_is_ncq(qc->tf.protocol) &&
-                       ata_id_sense_reporting_enabled(dev->id))))) {
+                    (!ata_is_ncq(qc->tf.protocol) &&
+                     ata_id_sense_reporting_enabled(dev->id)))) {
                        /*
                         * Tell SCSI EH to not overwrite scmd->result even if
                         * this command is finished with result SAM_STAT_GOOD.
index 370d18a..c6ece32 100644 (file)
@@ -1100,7 +1100,14 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
                }
        } else {
                sdev->sector_size = ata_id_logical_sector_size(dev->id);
+               /*
+                * Stop the drive on suspend but do not issue START STOP UNIT
+                * on resume as this is not necessary and may fail: the device
+                * will be woken up by ata_port_pm_resume() with a port reset
+                * and device revalidation.
+                */
                sdev->manage_start_stop = 1;
+               sdev->no_start_on_resume = 1;
        }
 
        /*
index 6ab2943..314eaa1 100644 (file)
@@ -529,7 +529,8 @@ static void data_xfer(struct work_struct *work)
        /* dma_request_channel may sleep, so calling from process context */
        acdev->dma_chan = dma_request_chan(acdev->host->dev, "data");
        if (IS_ERR(acdev->dma_chan)) {
-               dev_err(acdev->host->dev, "Unable to get dma_chan\n");
+               dev_err_probe(acdev->host->dev, PTR_ERR(acdev->dma_chan),
+                             "Unable to get dma_chan\n");
                acdev->dma_chan = NULL;
                goto chan_request_fail;
        }
index d60e1f6..c697219 100644 (file)
@@ -260,7 +260,7 @@ static u8 ns87560_check_status(struct ata_port *ap)
  *     LOCKING:
  *     Inherited from caller.
  */
-void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+static void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
 {
        struct ata_ioports *ioaddr = &ap->ioaddr;
 
index ff538b8..2884acf 100644 (file)
@@ -815,8 +815,8 @@ static int octeon_cf_probe(struct platform_device *pdev)
        irq_handler_t irq_handler = NULL;
        void __iomem *base;
        struct octeon_cf_port *cf_port;
-       int rv = -ENOMEM;
        u32 bus_width;
+       int rv;
 
        node = pdev->dev.of_node;
        if (node == NULL)
@@ -893,12 +893,12 @@ static int octeon_cf_probe(struct platform_device *pdev)
        cs0 = devm_ioremap(&pdev->dev, res_cs0->start,
                                   resource_size(res_cs0));
        if (!cs0)
-               return rv;
+               return -ENOMEM;
 
        /* allocate host */
        host = ata_host_alloc(&pdev->dev, 1);
        if (!host)
-               return rv;
+               return -ENOMEM;
 
        ap = host->ports[0];
        ap->private_data = cf_port;
index 8328a49..620ce6c 100644 (file)
@@ -139,4 +139,6 @@ static struct pi_protocol aten = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("ATEN EH-100 parallel port IDE adapter protocol driver");
 module_pata_parport_driver(aten);
index 9f4309f..bba1eda 100644 (file)
@@ -502,4 +502,6 @@ static struct pi_protocol bpck = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("MicroSolutions BACKPACK parallel port IDE adapter protocol driver");
 module_pata_parport_driver(bpck);
index c6dbd14..62c2b53 100644 (file)
@@ -459,5 +459,6 @@ static struct pi_protocol bpck6 = {
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Micro Solutions Inc.");
-MODULE_DESCRIPTION("BACKPACK Protocol module, compatible with PARIDE");
+MODULE_DESCRIPTION("Micro Solutions BACKPACK parallel port IDE adapter "
+                  "(version 6 drives) protocol driver");
 module_pata_parport_driver(bpck6);
index cc5485b..4839bec 100644 (file)
@@ -201,4 +201,6 @@ static struct pi_protocol comm = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("DataStor Commuter parallel port IDE adapter protocol driver");
 module_pata_parport_driver(comm);
index 368d7c7..88930bb 100644 (file)
@@ -230,4 +230,6 @@ static struct pi_protocol dstr = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("DataStor EP2000 parallel port IDE adapter protocol driver");
 module_pata_parport_driver(dstr);
index 016bd96..3cb54fc 100644 (file)
@@ -358,5 +358,8 @@ static void __exit epat_exit(void)
 }
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Shuttle Technologies EPAT parallel port IDE adapter "
+                  "protocol driver");
 module_init(epat_init)
 module_exit(epat_exit)
index 920e9f4..7aaba47 100644 (file)
@@ -306,4 +306,7 @@ static struct pi_protocol epia = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Shuttle Technologies EPIA parallel port IDE adapter "
+                  "protocol driver");
 module_pata_parport_driver(epia);
index 6524f30..de79cf9 100644 (file)
@@ -132,4 +132,7 @@ static struct pi_protocol fit2 = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Fidelity International Technology parallel port IDE adapter"
+                  "(older models) protocol driver");
 module_pata_parport_driver(fit2);
index c172a38..bad7aa9 100644 (file)
@@ -193,4 +193,7 @@ static struct pi_protocol fit3 = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Fidelity International Technology parallel port IDE adapter"
+                  "(newer models) protocol driver");
 module_pata_parport_driver(fit3);
index dc428f5..7abe2ff 100644 (file)
@@ -259,4 +259,6 @@ static struct pi_protocol friq = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Freecom IQ parallel port IDE adapter protocol driver");
 module_pata_parport_driver(friq);
index 28d9bb2..7fa9b98 100644 (file)
@@ -293,4 +293,6 @@ static struct pi_protocol frpw = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Freecom Power parallel port IDE adapter protocol driver");
 module_pata_parport_driver(frpw);
index 6023e07..fca3226 100644 (file)
@@ -301,5 +301,8 @@ static void __exit kbic_exit(void)
 }
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("KingByte Information Systems KBIC-951A and KBIC-971A "
+                  "parallel port IDE adapter protocol driver");
 module_init(kbic_init)
 module_exit(kbic_exit)
index bca6c20..c078d19 100644 (file)
@@ -106,4 +106,6 @@ static struct pi_protocol ktti = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("KT Technology parallel port IDE adapter protocol driver");
 module_pata_parport_driver(ktti);
index 34e69da..7c70e5b 100644 (file)
@@ -142,4 +142,6 @@ static struct pi_protocol on20 = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Onspec 90c20 parallel port IDE adapter protocol driver");
 module_pata_parport_driver(on20);
index 5da317b..c88e5d6 100644 (file)
@@ -310,4 +310,6 @@ static struct pi_protocol on26 = {
 };
 
 MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Onspec 90c26 parallel port IDE adapter protocol driver");
 module_pata_parport_driver(on26);
index c1815b9..fe6690e 100644 (file)
@@ -509,73 +509,30 @@ static void __init cpu_dev_register_generic(void)
 }
 
 #ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
-
-ssize_t __weak cpu_show_meltdown(struct device *dev,
-                                struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spectre_v1(struct device *dev,
-                                  struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spectre_v2(struct device *dev,
-                                  struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
-                                         struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_l1tf(struct device *dev,
-                            struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_mds(struct device *dev,
-                           struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
-                                       struct device_attribute *attr,
-                                       char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
-                                     struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_srbds(struct device *dev,
+static ssize_t cpu_show_not_affected(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
        return sysfs_emit(buf, "Not affected\n");
 }
 
-ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
-                                       struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_retbleed(struct device *dev,
-                                struct device_attribute *attr, char *buf)
-{
-       return sysfs_emit(buf, "Not affected\n");
-}
+#define CPU_SHOW_VULN_FALLBACK(func)                                   \
+       ssize_t cpu_show_##func(struct device *,                        \
+                                 struct device_attribute *, char *)    \
+                __attribute__((weak, alias("cpu_show_not_affected")))
+
+CPU_SHOW_VULN_FALLBACK(meltdown);
+CPU_SHOW_VULN_FALLBACK(spectre_v1);
+CPU_SHOW_VULN_FALLBACK(spectre_v2);
+CPU_SHOW_VULN_FALLBACK(spec_store_bypass);
+CPU_SHOW_VULN_FALLBACK(l1tf);
+CPU_SHOW_VULN_FALLBACK(mds);
+CPU_SHOW_VULN_FALLBACK(tsx_async_abort);
+CPU_SHOW_VULN_FALLBACK(itlb_multihit);
+CPU_SHOW_VULN_FALLBACK(srbds);
+CPU_SHOW_VULN_FALLBACK(mmio_stale_data);
+CPU_SHOW_VULN_FALLBACK(retbleed);
+CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow);
+CPU_SHOW_VULN_FALLBACK(gds);
 
 static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
 static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -588,6 +545,8 @@ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
 static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
 static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
 static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
+static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
+static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
 
 static struct attribute *cpu_root_vulnerabilities_attrs[] = {
        &dev_attr_meltdown.attr,
@@ -601,6 +560,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
        &dev_attr_srbds.attr,
        &dev_attr_mmio_stale_data.attr,
        &dev_attr_retbleed.attr,
+       &dev_attr_spec_rstack_overflow.attr,
+       &dev_attr_gather_data_sampling.attr,
        NULL
 };
 
index 0eb7f02..922ed45 100644 (file)
@@ -29,6 +29,7 @@ extern u64 pm_runtime_active_time(struct device *dev);
 #define WAKE_IRQ_DEDICATED_MASK                (WAKE_IRQ_DEDICATED_ALLOCATED | \
                                         WAKE_IRQ_DEDICATED_MANAGED | \
                                         WAKE_IRQ_DEDICATED_REVERSE)
+#define WAKE_IRQ_DEDICATED_ENABLED     BIT(3)
 
 struct wake_irq {
        struct device *dev;
index d487a6b..42171f7 100644 (file)
@@ -194,7 +194,6 @@ err_free:
        return err;
 }
 
-
 /**
  * dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
  * @dev: Device entry
@@ -206,11 +205,6 @@ err_free:
  * Sets up a threaded interrupt handler for a device that has
  * a dedicated wake-up interrupt in addition to the device IO
  * interrupt.
- *
- * The interrupt starts disabled, and needs to be managed for
- * the device by the bus code or the device driver using
- * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*()
- * functions.
  */
 int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
 {
@@ -232,11 +226,6 @@ EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);
  * the status of WAKE_IRQ_DEDICATED_REVERSE to tell rpm_suspend()
  * to enable dedicated wake-up interrupt after running the runtime suspend
  * callback for @dev.
- *
- * The interrupt starts disabled, and needs to be managed for
- * the device by the bus code or the device driver using
- * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*()
- * functions.
  */
 int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
 {
@@ -245,44 +234,6 @@ int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
 EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse);
 
 /**
- * dev_pm_enable_wake_irq - Enable device wake-up interrupt
- * @dev: Device
- *
- * Optionally called from the bus code or the device driver for
- * runtime_resume() to override the PM runtime core managed wake-up
- * interrupt handling to enable the wake-up interrupt.
- *
- * Note that for runtime_suspend()) the wake-up interrupts
- * should be unconditionally enabled unlike for suspend()
- * that is conditional.
- */
-void dev_pm_enable_wake_irq(struct device *dev)
-{
-       struct wake_irq *wirq = dev->power.wakeirq;
-
-       if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED))
-               enable_irq(wirq->irq);
-}
-EXPORT_SYMBOL_GPL(dev_pm_enable_wake_irq);
-
-/**
- * dev_pm_disable_wake_irq - Disable device wake-up interrupt
- * @dev: Device
- *
- * Optionally called from the bus code or the device driver for
- * runtime_suspend() to override the PM runtime core managed wake-up
- * interrupt handling to disable the wake-up interrupt.
- */
-void dev_pm_disable_wake_irq(struct device *dev)
-{
-       struct wake_irq *wirq = dev->power.wakeirq;
-
-       if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED))
-               disable_irq_nosync(wirq->irq);
-}
-EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq);
-
-/**
  * dev_pm_enable_wake_irq_check - Checks and enables wake-up interrupt
  * @dev: Device
  * @can_change_status: Can change wake-up interrupt status
@@ -314,8 +265,10 @@ void dev_pm_enable_wake_irq_check(struct device *dev,
        return;
 
 enable:
-       if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
+       if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) {
                enable_irq(wirq->irq);
+               wirq->status |= WAKE_IRQ_DEDICATED_ENABLED;
+       }
 }
 
 /**
@@ -336,8 +289,10 @@ void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable)
        if (cond_disable && (wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
                return;
 
-       if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED)
+       if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) {
+               wirq->status &= ~WAKE_IRQ_DEDICATED_ENABLED;
                disable_irq_nosync(wirq->irq);
+       }
 }
 
 /**
@@ -376,7 +331,7 @@ void dev_pm_arm_wake_irq(struct wake_irq *wirq)
 
        if (device_may_wakeup(wirq->dev)) {
                if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
-                   !pm_runtime_status_suspended(wirq->dev))
+                   !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
                        enable_irq(wirq->irq);
 
                enable_irq_wake(wirq->irq);
@@ -399,7 +354,7 @@ void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
                disable_irq_wake(wirq->irq);
 
                if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
-                   !pm_runtime_status_suspended(wirq->dev))
+                   !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
                        disable_irq_nosync(wirq->irq);
        }
 }
index fabf870..584bcc5 100644 (file)
@@ -471,6 +471,8 @@ static int regcache_rbtree_sync(struct regmap *map, unsigned int min,
        unsigned int start, end;
        int ret;
 
+       map->async = true;
+
        rbtree_ctx = map->cache;
        for (node = rb_first(&rbtree_ctx->root); node; node = rb_next(node)) {
                rbnode = rb_entry(node, struct regcache_rbtree_node, node);
@@ -499,6 +501,8 @@ static int regcache_rbtree_sync(struct regmap *map, unsigned int min,
                        return ret;
        }
 
+       map->async = false;
+
        return regmap_async_complete(map);
 }
 
index 28bc3ae..7d3e474 100644 (file)
@@ -368,8 +368,6 @@ int regcache_sync(struct regmap *map)
        if (!map->cache_dirty)
                goto out;
 
-       map->async = true;
-
        /* Apply any patch first */
        map->cache_bypass = true;
        for (i = 0; i < map->patch_regs; i++) {
@@ -392,7 +390,6 @@ int regcache_sync(struct regmap *map)
 
 out:
        /* Restore the bypass state */
-       map->async = false;
        map->cache_bypass = bypass;
        map->no_sync_defaults = false;
        map->unlock(map->lock_arg);
index 980e5ce..3ec611d 100644 (file)
@@ -242,8 +242,8 @@ static int regmap_i2c_smbus_i2c_read(void *context, const void *reg,
 static const struct regmap_bus regmap_i2c_smbus_i2c_block = {
        .write = regmap_i2c_smbus_i2c_write,
        .read = regmap_i2c_smbus_i2c_read,
-       .max_raw_read = I2C_SMBUS_BLOCK_MAX,
-       .max_raw_write = I2C_SMBUS_BLOCK_MAX,
+       .max_raw_read = I2C_SMBUS_BLOCK_MAX - 1,
+       .max_raw_write = I2C_SMBUS_BLOCK_MAX - 1,
 };
 
 static int regmap_i2c_smbus_i2c_write_reg16(void *context, const void *data,
@@ -299,8 +299,8 @@ static int regmap_i2c_smbus_i2c_read_reg16(void *context, const void *reg,
 static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = {
        .write = regmap_i2c_smbus_i2c_write_reg16,
        .read = regmap_i2c_smbus_i2c_read_reg16,
-       .max_raw_read = I2C_SMBUS_BLOCK_MAX,
-       .max_raw_write = I2C_SMBUS_BLOCK_MAX,
+       .max_raw_read = I2C_SMBUS_BLOCK_MAX - 2,
+       .max_raw_write = I2C_SMBUS_BLOCK_MAX - 2,
 };
 
 static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
index ced0dcf..45fd13e 100644 (file)
@@ -717,7 +717,7 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode,
                if (!d->config_buf)
                        goto err_alloc;
 
-               for (i = 0; i < chip->num_config_regs; i++) {
+               for (i = 0; i < chip->num_config_bases; i++) {
                        d->config_buf[i] = kcalloc(chip->num_config_regs,
                                                   sizeof(**d->config_buf),
                                                   GFP_KERNEL);
index 24257aa..9ff3018 100644 (file)
@@ -58,6 +58,9 @@ static struct regmap *gen_regmap(struct regmap_config *config,
        int i;
        struct reg_default *defaults;
 
+       config->disable_locking = config->cache_type == REGCACHE_RBTREE ||
+                                       config->cache_type == REGCACHE_MAPLE;
+
        buf = kmalloc(size, GFP_KERNEL);
        if (!buf)
                return ERR_PTR(-ENOMEM);
@@ -889,6 +892,8 @@ static struct regmap *gen_raw_regmap(struct regmap_config *config,
 
        config->cache_type = test_type->cache_type;
        config->val_format_endian = test_type->val_endian;
+       config->disable_locking = config->cache_type == REGCACHE_RBTREE ||
+                                       config->cache_type == REGCACHE_MAPLE;
 
        buf = kmalloc(size, GFP_KERNEL);
        if (!buf)
index 6af6928..4c2b94b 100644 (file)
@@ -660,7 +660,7 @@ static const struct regmap_bus regmap_spi_avmm_bus = {
        .reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
        .val_format_endian_default = REGMAP_ENDIAN_NATIVE,
        .max_raw_read = SPI_AVMM_VAL_SIZE * MAX_READ_CNT,
-       .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
+       .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
        .free_context = spi_avmm_bridge_ctx_free,
 };
 
index 89a7f1c..1bfd172 100644 (file)
@@ -2082,8 +2082,6 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
        size_t val_count = val_len / val_bytes;
        size_t chunk_count, chunk_bytes;
        size_t chunk_regs = val_count;
-       size_t max_data = map->max_raw_write - map->format.reg_bytes -
-                       map->format.pad_bytes;
        int ret, i;
 
        if (!val_count)
@@ -2091,8 +2089,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
 
        if (map->use_single_write)
                chunk_regs = 1;
-       else if (map->max_raw_write && val_len > max_data)
-               chunk_regs = max_data / val_bytes;
+       else if (map->max_raw_write && val_len > map->max_raw_write)
+               chunk_regs = map->max_raw_write / val_bytes;
 
        chunk_count = val_count / chunk_regs;
        chunk_bytes = chunk_regs * val_bytes;
index e460c97..2b98114 100644 (file)
@@ -1547,7 +1547,6 @@ static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
                        rel_fdc();
                        return -EBUSY;
                }
-               fsync_bdev(bdev);
                if (fd_motor_on(drive) == 0) {
                        rel_fdc();
                        return -ENODEV;
index 2db9b18..ea4eb88 100644 (file)
@@ -3255,7 +3255,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
 
                        if (!disk || ITYPE(drive_state[cnt].fd_device) != type)
                                continue;
-                       __invalidate_device(disk->part0, true);
+                       disk_force_media_change(disk);
                }
                mutex_unlock(&open_lock);
        } else {
index 37511d2..9f2d412 100644 (file)
@@ -603,7 +603,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
                goto out_err;
 
        /* and ... switch */
-       disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+       disk_force_media_change(lo->lo_disk);
        blk_mq_freeze_queue(lo->lo_queue);
        mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
        lo->lo_backing_file = file;
@@ -1067,7 +1067,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
        /* suppress uevents while reconfiguring the device */
        dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
 
-       disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+       disk_force_media_change(lo->lo_disk);
        set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
 
        lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
@@ -1171,7 +1171,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
        if (!release)
                blk_mq_unfreeze_queue(lo->lo_queue);
 
-       disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+       disk_force_media_change(lo->lo_disk);
 
        if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
                int err;
@@ -1775,14 +1775,43 @@ static const struct block_device_operations lo_fops = {
 /*
  * If max_loop is specified, create that many devices upfront.
  * This also becomes a hard limit. If max_loop is not specified,
+ * the default isn't a hard limit (as before commit 85c50197716c
+ * changed the default value from 0 for max_loop=0 reasons), just
  * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
  * init time. Loop devices can be requested on-demand with the
  * /dev/loop-control interface, or be instantiated by accessing
  * a 'dead' device node.
  */
 static int max_loop = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
-module_param(max_loop, int, 0444);
+
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
+static bool max_loop_specified;
+
+static int max_loop_param_set_int(const char *val,
+                                 const struct kernel_param *kp)
+{
+       int ret;
+
+       ret = param_set_int(val, kp);
+       if (ret < 0)
+               return ret;
+
+       max_loop_specified = true;
+       return 0;
+}
+
+static const struct kernel_param_ops max_loop_param_ops = {
+       .set = max_loop_param_set_int,
+       .get = param_get_int,
+};
+
+module_param_cb(max_loop, &max_loop_param_ops, &max_loop, 0444);
 MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
+#else
+module_param(max_loop, int, 0444);
+MODULE_PARM_DESC(max_loop, "Initial number of loop devices");
+#endif
+
 module_param(max_part, int, 0444);
 MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
 
@@ -2093,14 +2122,18 @@ static void loop_remove(struct loop_device *lo)
        put_disk(lo->lo_disk);
 }
 
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
 static void loop_probe(dev_t dev)
 {
        int idx = MINOR(dev) >> part_shift;
 
-       if (max_loop && idx >= max_loop)
+       if (max_loop_specified && max_loop && idx >= max_loop)
                return;
        loop_add(idx);
 }
+#else
+#define loop_probe NULL
+#endif /* !CONFIG_BLOCK_LEGACY_AUTOLOAD */
 
 static int loop_control_remove(int idx)
 {
@@ -2281,6 +2314,9 @@ module_exit(loop_exit);
 static int __init max_loop_setup(char *str)
 {
        max_loop = simple_strtol(str, NULL, 0);
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
+       max_loop_specified = true;
+#endif
        return 1;
 }
 
index 8576d69..42e0159 100644 (file)
@@ -1434,12 +1434,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd)
        return ret;
 }
 
-static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
-                                struct block_device *bdev)
+static void nbd_clear_sock_ioctl(struct nbd_device *nbd)
 {
+       blk_mark_disk_dead(nbd->disk);
        nbd_clear_sock(nbd);
-       __invalidate_device(bdev, true);
-       nbd_bdev_reset(nbd);
        if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
                               &nbd->config->runtime_flags))
                nbd_config_put(nbd);
@@ -1465,7 +1463,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
        case NBD_DISCONNECT:
                return nbd_disconnect(nbd);
        case NBD_CLEAR_SOCK:
-               nbd_clear_sock_ioctl(nbd, bdev);
+               nbd_clear_sock_ioctl(nbd);
                return 0;
        case NBD_SET_SOCK:
                return nbd_add_socket(nbd, arg, false);
index 635ce06..55c5b48 100644 (file)
@@ -162,21 +162,15 @@ int null_register_zoned_dev(struct nullb *nullb)
        disk_set_zoned(nullb->disk, BLK_ZONED_HM);
        blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
        blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
-
-       if (queue_is_mq(q)) {
-               int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
-
-               if (ret)
-                       return ret;
-       } else {
-               blk_queue_chunk_sectors(q, dev->zone_size_sects);
-               nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
-       }
-
+       blk_queue_chunk_sectors(q, dev->zone_size_sects);
+       nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
        blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
        disk_set_max_open_zones(nullb->disk, dev->zone_max_open);
        disk_set_max_active_zones(nullb->disk, dev->zone_max_active);
 
+       if (queue_is_mq(q))
+               return blk_revalidate_disk_zones(nullb->disk, NULL);
+
        return 0;
 }
 
index bd0e075..2328cc0 100644 (file)
@@ -3675,7 +3675,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
        ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
                            RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
                            RBD_LOCK_TAG, "", 0);
-       if (ret)
+       if (ret && ret != -EEXIST)
                return ret;
 
        __rbd_lock(rbd_dev, cookie);
@@ -3849,51 +3849,82 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
        list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
 }
 
-static int get_lock_owner_info(struct rbd_device *rbd_dev,
-                              struct ceph_locker **lockers, u32 *num_lockers)
+static bool locker_equal(const struct ceph_locker *lhs,
+                        const struct ceph_locker *rhs)
+{
+       return lhs->id.name.type == rhs->id.name.type &&
+              lhs->id.name.num == rhs->id.name.num &&
+              !strcmp(lhs->id.cookie, rhs->id.cookie) &&
+              ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
+}
+
+static void free_locker(struct ceph_locker *locker)
+{
+       if (locker)
+               ceph_free_lockers(locker, 1);
+}
+
+static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
 {
        struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+       struct ceph_locker *lockers;
+       u32 num_lockers;
        u8 lock_type;
        char *lock_tag;
+       u64 handle;
        int ret;
 
-       dout("%s rbd_dev %p\n", __func__, rbd_dev);
-
        ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
                                 &rbd_dev->header_oloc, RBD_LOCK_NAME,
-                                &lock_type, &lock_tag, lockers, num_lockers);
-       if (ret)
-               return ret;
+                                &lock_type, &lock_tag, &lockers, &num_lockers);
+       if (ret) {
+               rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
+               return ERR_PTR(ret);
+       }
 
-       if (*num_lockers == 0) {
+       if (num_lockers == 0) {
                dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
+               lockers = NULL;
                goto out;
        }
 
        if (strcmp(lock_tag, RBD_LOCK_TAG)) {
                rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
                         lock_tag);
-               ret = -EBUSY;
-               goto out;
+               goto err_busy;
        }
 
-       if (lock_type == CEPH_CLS_LOCK_SHARED) {
-               rbd_warn(rbd_dev, "shared lock type detected");
-               ret = -EBUSY;
-               goto out;
+       if (lock_type != CEPH_CLS_LOCK_EXCLUSIVE) {
+               rbd_warn(rbd_dev, "incompatible lock type detected");
+               goto err_busy;
        }
 
-       if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
-                   strlen(RBD_LOCK_COOKIE_PREFIX))) {
+       WARN_ON(num_lockers != 1);
+       ret = sscanf(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu",
+                    &handle);
+       if (ret != 1) {
                rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
-                        (*lockers)[0].id.cookie);
-               ret = -EBUSY;
-               goto out;
+                        lockers[0].id.cookie);
+               goto err_busy;
+       }
+       if (ceph_addr_is_blank(&lockers[0].info.addr)) {
+               rbd_warn(rbd_dev, "locker has a blank address");
+               goto err_busy;
        }
 
+       dout("%s rbd_dev %p got locker %s%llu@%pISpc/%u handle %llu\n",
+            __func__, rbd_dev, ENTITY_NAME(lockers[0].id.name),
+            &lockers[0].info.addr.in_addr,
+            le32_to_cpu(lockers[0].info.addr.nonce), handle);
+
 out:
        kfree(lock_tag);
-       return ret;
+       return lockers;
+
+err_busy:
+       kfree(lock_tag);
+       ceph_free_lockers(lockers, num_lockers);
+       return ERR_PTR(-EBUSY);
 }
 
 static int find_watcher(struct rbd_device *rbd_dev,
@@ -3909,8 +3940,10 @@ static int find_watcher(struct rbd_device *rbd_dev,
        ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
                                      &rbd_dev->header_oloc, &watchers,
                                      &num_watchers);
-       if (ret)
+       if (ret) {
+               rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
                return ret;
+       }
 
        sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
        for (i = 0; i < num_watchers; i++) {
@@ -3947,51 +3980,72 @@ out:
 static int rbd_try_lock(struct rbd_device *rbd_dev)
 {
        struct ceph_client *client = rbd_dev->rbd_client->client;
-       struct ceph_locker *lockers;
-       u32 num_lockers;
+       struct ceph_locker *locker, *refreshed_locker;
        int ret;
 
        for (;;) {
+               locker = refreshed_locker = NULL;
+
                ret = rbd_lock(rbd_dev);
-               if (ret != -EBUSY)
-                       return ret;
+               if (!ret)
+                       goto out;
+               if (ret != -EBUSY) {
+                       rbd_warn(rbd_dev, "failed to lock header: %d", ret);
+                       goto out;
+               }
 
                /* determine if the current lock holder is still alive */
-               ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
-               if (ret)
-                       return ret;
-
-               if (num_lockers == 0)
+               locker = get_lock_owner_info(rbd_dev);
+               if (IS_ERR(locker)) {
+                       ret = PTR_ERR(locker);
+                       locker = NULL;
+                       goto out;
+               }
+               if (!locker)
                        goto again;
 
-               ret = find_watcher(rbd_dev, lockers);
+               ret = find_watcher(rbd_dev, locker);
                if (ret)
                        goto out; /* request lock or error */
 
+               refreshed_locker = get_lock_owner_info(rbd_dev);
+               if (IS_ERR(refreshed_locker)) {
+                       ret = PTR_ERR(refreshed_locker);
+                       refreshed_locker = NULL;
+                       goto out;
+               }
+               if (!refreshed_locker ||
+                   !locker_equal(locker, refreshed_locker))
+                       goto again;
+
                rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
-                        ENTITY_NAME(lockers[0].id.name));
+                        ENTITY_NAME(locker->id.name));
 
                ret = ceph_monc_blocklist_add(&client->monc,
-                                             &lockers[0].info.addr);
+                                             &locker->info.addr);
                if (ret) {
-                       rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d",
-                                ENTITY_NAME(lockers[0].id.name), ret);
+                       rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d",
+                                ENTITY_NAME(locker->id.name), ret);
                        goto out;
                }
 
                ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
                                          &rbd_dev->header_oloc, RBD_LOCK_NAME,
-                                         lockers[0].id.cookie,
-                                         &lockers[0].id.name);
-               if (ret && ret != -ENOENT)
+                                         locker->id.cookie, &locker->id.name);
+               if (ret && ret != -ENOENT) {
+                       rbd_warn(rbd_dev, "failed to break header lock: %d",
+                                ret);
                        goto out;
+               }
 
 again:
-               ceph_free_lockers(lockers, num_lockers);
+               free_locker(refreshed_locker);
+               free_locker(locker);
        }
 
 out:
-       ceph_free_lockers(lockers, num_lockers);
+       free_locker(refreshed_locker);
+       free_locker(locker);
        return ret;
 }
 
@@ -4041,11 +4095,8 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev)
 
        ret = rbd_try_lock(rbd_dev);
        if (ret < 0) {
-               rbd_warn(rbd_dev, "failed to lock header: %d", ret);
-               if (ret == -EBLOCKLISTED)
-                       goto out;
-
-               ret = 1; /* request lock anyway */
+               rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
+               goto out;
        }
        if (ret > 0) {
                up_write(&rbd_dev->lock_rwsem);
@@ -6579,12 +6630,11 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
                cancel_delayed_work_sync(&rbd_dev->lock_dwork);
                if (!ret)
                        ret = -ETIMEDOUT;
-       }
 
-       if (ret) {
-               rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
-               return ret;
+               rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
        }
+       if (ret)
+               return ret;
 
        /*
         * The lock may have been released by now, unless automatic lock
index c36d8b1..3988755 100644 (file)
@@ -25,7 +25,7 @@
 
 static struct device *rnbd_dev;
 static const struct class rnbd_dev_class = {
-       .name = "rnbd_client",
+       .name = "rnbd-client",
 };
 static struct kobject *rnbd_devs_kobj;
 
index 1c82375..21d2e71 100644 (file)
@@ -1847,7 +1847,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
        if (ublksrv_pid <= 0)
                return -EINVAL;
 
-       wait_for_completion_interruptible(&ub->completion);
+       if (wait_for_completion_interruptible(&ub->completion) != 0)
+               return -EINTR;
 
        schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
 
@@ -2125,8 +2126,8 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
         * - the device number is freed already, we will not find this
         *   device via ublk_get_device_from_id()
         */
-       wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx));
-
+       if (wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)))
+               return -EINTR;
        return 0;
 }
 
@@ -2323,7 +2324,9 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
        pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
                        __func__, ub->dev_info.nr_hw_queues, header->dev_id);
        /* wait until new ubq_daemon sending all FETCH_REQ */
-       wait_for_completion_interruptible(&ub->completion);
+       if (wait_for_completion_interruptible(&ub->completion))
+               return -EINTR;
+
        pr_devel("%s: All new ubq_daemons(nr: %d) are ready, dev id %d\n",
                        __func__, ub->dev_info.nr_hw_queues, header->dev_id);
 
index b47358d..1fe0116 100644 (file)
@@ -751,7 +751,6 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
 {
        u32 v, wg;
        u8 model;
-       int ret;
 
        virtio_cread(vdev, struct virtio_blk_config,
                     zoned.model, &model);
@@ -806,6 +805,7 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
                        vblk->zone_sectors);
                return -ENODEV;
        }
+       blk_queue_chunk_sectors(q, vblk->zone_sectors);
        dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors);
 
        if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
@@ -814,26 +814,22 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
                blk_queue_max_discard_sectors(q, 0);
        }
 
-       ret = blk_revalidate_disk_zones(vblk->disk, NULL);
-       if (!ret) {
-               virtio_cread(vdev, struct virtio_blk_config,
-                            zoned.max_append_sectors, &v);
-               if (!v) {
-                       dev_warn(&vdev->dev, "zero max_append_sectors reported\n");
-                       return -ENODEV;
-               }
-               if ((v << SECTOR_SHIFT) < wg) {
-                       dev_err(&vdev->dev,
-                               "write granularity %u exceeds max_append_sectors %u limit\n",
-                               wg, v);
-                       return -ENODEV;
-               }
-
-               blk_queue_max_zone_append_sectors(q, v);
-               dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
+       virtio_cread(vdev, struct virtio_blk_config,
+                    zoned.max_append_sectors, &v);
+       if (!v) {
+               dev_warn(&vdev->dev, "zero max_append_sectors reported\n");
+               return -ENODEV;
+       }
+       if ((v << SECTOR_SHIFT) < wg) {
+               dev_err(&vdev->dev,
+                       "write granularity %u exceeds max_append_sectors %u limit\n",
+                       wg, v);
+               return -ENODEV;
        }
+       blk_queue_max_zone_append_sectors(q, v);
+       dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
 
-       return ret;
+       return blk_revalidate_disk_zones(vblk->disk, NULL);
 }
 
 #else
index 5676e6d..06673c6 100644 (file)
@@ -1870,15 +1870,16 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio)
 
 static void zram_bio_read(struct zram *zram, struct bio *bio)
 {
-       struct bvec_iter iter;
-       struct bio_vec bv;
-       unsigned long start_time;
+       unsigned long start_time = bio_start_io_acct(bio);
+       struct bvec_iter iter = bio->bi_iter;
 
-       start_time = bio_start_io_acct(bio);
-       bio_for_each_segment(bv, bio, iter) {
+       do {
                u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
                u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
                                SECTOR_SHIFT;
+               struct bio_vec bv = bio_iter_iovec(bio, iter);
+
+               bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
 
                if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
                        atomic64_inc(&zram->stats.failed_reads);
@@ -1890,22 +1891,26 @@ static void zram_bio_read(struct zram *zram, struct bio *bio)
                zram_slot_lock(zram, index);
                zram_accessed(zram, index);
                zram_slot_unlock(zram, index);
-       }
+
+               bio_advance_iter_single(bio, &iter, bv.bv_len);
+       } while (iter.bi_size);
+
        bio_end_io_acct(bio, start_time);
        bio_endio(bio);
 }
 
 static void zram_bio_write(struct zram *zram, struct bio *bio)
 {
-       struct bvec_iter iter;
-       struct bio_vec bv;
-       unsigned long start_time;
+       unsigned long start_time = bio_start_io_acct(bio);
+       struct bvec_iter iter = bio->bi_iter;
 
-       start_time = bio_start_io_acct(bio);
-       bio_for_each_segment(bv, bio, iter) {
+       do {
                u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
                u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
                                SECTOR_SHIFT;
+               struct bio_vec bv = bio_iter_iovec(bio, iter);
+
+               bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
 
                if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
                        atomic64_inc(&zram->stats.failed_writes);
@@ -1916,7 +1921,10 @@ static void zram_bio_write(struct zram *zram, struct bio *bio)
                zram_slot_lock(zram, index);
                zram_accessed(zram, index);
                zram_slot_unlock(zram, index);
-       }
+
+               bio_advance_iter_single(bio, &iter, bv.bv_len);
+       } while (iter.bi_size);
+
        bio_end_io_acct(bio, start_time);
        bio_endio(bio);
 }
index 5ec4ad0..764d176 100644 (file)
@@ -4104,6 +4104,7 @@ static int btusb_probe(struct usb_interface *intf,
        BT_DBG("intf %p id %p", intf, id);
 
        if ((id->driver_info & BTUSB_IFNUM_2) &&
+           (intf->cur_altsetting->desc.bInterfaceNumber != 0) &&
            (intf->cur_altsetting->desc.bInterfaceNumber != 2))
                return -ENODEV;
 
index 21fe985..4cb23b9 100644 (file)
@@ -2142,6 +2142,8 @@ static int sysc_reset(struct sysc *ddata)
                sysc_val = sysc_read_sysconfig(ddata);
                sysc_val |= sysc_mask;
                sysc_write(ddata, sysc_offset, sysc_val);
+               /* Flush posted write */
+               sysc_val = sysc_read_sysconfig(ddata);
        }
 
        if (ddata->cfg.srst_udelay)
index 2d28f55..661574b 100644 (file)
@@ -160,7 +160,7 @@ static struct i2c_driver st33zp24_i2c_driver = {
                .of_match_table = of_match_ptr(of_st33zp24_i2c_match),
                .acpi_match_table = ACPI_PTR(st33zp24_i2c_acpi_match),
        },
-       .probe_new = st33zp24_i2c_probe,
+       .probe = st33zp24_i2c_probe,
        .remove = st33zp24_i2c_remove,
        .id_table = st33zp24_i2c_id
 };
index cd48033..ea6b401 100644 (file)
@@ -510,63 +510,6 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip)
        return 0;
 }
 
-/*
- * Some AMD fTPM versions may cause stutter
- * https://www.amd.com/en/support/kb/faq/pa-410
- *
- * Fixes are available in two series of fTPM firmware:
- * 6.x.y.z series: 6.0.18.6 +
- * 3.x.y.z series: 3.57.y.5 +
- */
-static bool tpm_amd_is_rng_defective(struct tpm_chip *chip)
-{
-       u32 val1, val2;
-       u64 version;
-       int ret;
-
-       if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
-               return false;
-
-       ret = tpm_request_locality(chip);
-       if (ret)
-               return false;
-
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val1, NULL);
-       if (ret)
-               goto release;
-       if (val1 != 0x414D4400U /* AMD */) {
-               ret = -ENODEV;
-               goto release;
-       }
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_1, &val1, NULL);
-       if (ret)
-               goto release;
-       ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_2, &val2, NULL);
-
-release:
-       tpm_relinquish_locality(chip);
-
-       if (ret)
-               return false;
-
-       version = ((u64)val1 << 32) | val2;
-       if ((version >> 48) == 6) {
-               if (version >= 0x0006000000180006ULL)
-                       return false;
-       } else if ((version >> 48) == 3) {
-               if (version >= 0x0003005700000005ULL)
-                       return false;
-       } else {
-               return false;
-       }
-
-       dev_warn(&chip->dev,
-                "AMD fTPM version 0x%llx causes system stutter; hwrng disabled\n",
-                version);
-
-       return true;
-}
-
 static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 {
        struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng);
@@ -578,10 +521,20 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
        return tpm_get_random(chip, data, max);
 }
 
+static bool tpm_is_hwrng_enabled(struct tpm_chip *chip)
+{
+       if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM))
+               return false;
+       if (tpm_is_firmware_upgrade(chip))
+               return false;
+       if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED)
+               return false;
+       return true;
+}
+
 static int tpm_add_hwrng(struct tpm_chip *chip)
 {
-       if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip) ||
-           tpm_amd_is_rng_defective(chip))
+       if (!tpm_is_hwrng_enabled(chip))
                return 0;
 
        snprintf(chip->hwrng_name, sizeof(chip->hwrng_name),
@@ -686,7 +639,7 @@ int tpm_chip_register(struct tpm_chip *chip)
        return 0;
 
 out_hwrng:
-       if (IS_ENABLED(CONFIG_HW_RANDOM_TPM) && !tpm_is_firmware_upgrade(chip))
+       if (tpm_is_hwrng_enabled(chip))
                hwrng_unregister(&chip->hwrng);
 out_ppi:
        tpm_bios_log_teardown(chip);
@@ -711,8 +664,7 @@ EXPORT_SYMBOL_GPL(tpm_chip_register);
 void tpm_chip_unregister(struct tpm_chip *chip)
 {
        tpm_del_legacy_sysfs(chip);
-       if (IS_ENABLED(CONFIG_HW_RANDOM_TPM) && !tpm_is_firmware_upgrade(chip) &&
-           !tpm_amd_is_rng_defective(chip))
+       if (tpm_is_hwrng_enabled(chip))
                hwrng_unregister(&chip->hwrng);
        tpm_bios_log_teardown(chip);
        if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip))
index d43a0d7..9eb1a18 100644 (file)
@@ -463,6 +463,28 @@ static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
        return (cancel & CRB_CANCEL_INVOKE) == CRB_CANCEL_INVOKE;
 }
 
+static int crb_check_flags(struct tpm_chip *chip)
+{
+       u32 val;
+       int ret;
+
+       ret = crb_request_locality(chip, 0);
+       if (ret)
+               return ret;
+
+       ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val, NULL);
+       if (ret)
+               goto release;
+
+       if (val == 0x414D4400U /* AMD */)
+               chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
+
+release:
+       crb_relinquish_locality(chip, 0);
+
+       return ret;
+}
+
 static const struct tpm_class_ops tpm_crb = {
        .flags = TPM_OPS_AUTO_STARTUP,
        .status = crb_status,
@@ -563,15 +585,18 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
        u32 rsp_size;
        int ret;
 
-       INIT_LIST_HEAD(&acpi_resource_list);
-       ret = acpi_dev_get_resources(device, &acpi_resource_list,
-                                    crb_check_resource, iores_array);
-       if (ret < 0)
-               return ret;
-       acpi_dev_free_resource_list(&acpi_resource_list);
-
-       /* Pluton doesn't appear to define ACPI memory regions */
+       /*
+        * Pluton sometimes does not define ACPI memory regions.
+        * Mapping is then done in crb_map_pluton
+        */
        if (priv->sm != ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) {
+               INIT_LIST_HEAD(&acpi_resource_list);
+               ret = acpi_dev_get_resources(device, &acpi_resource_list,
+                                            crb_check_resource, iores_array);
+               if (ret < 0)
+                       return ret;
+               acpi_dev_free_resource_list(&acpi_resource_list);
+
                if (resource_type(iores_array) != IORESOURCE_MEM) {
                        dev_err(dev, FW_BUG "TPM2 ACPI table does not define a memory resource\n");
                        return -EINVAL;
@@ -797,6 +822,14 @@ static int crb_acpi_add(struct acpi_device *device)
        chip->acpi_dev_handle = device->handle;
        chip->flags = TPM_CHIP_FLAG_TPM2;
 
+       rc = tpm_chip_bootstrap(chip);
+       if (rc)
+               goto out;
+
+       rc = crb_check_flags(chip);
+       if (rc)
+               goto out;
+
        rc = tpm_chip_register(chip);
 
 out:
index 8f77154..301a95b 100644 (file)
@@ -203,7 +203,7 @@ static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume);
 
 static struct i2c_driver i2c_atmel_driver = {
        .id_table = i2c_atmel_id,
-       .probe_new = i2c_atmel_probe,
+       .probe = i2c_atmel_probe,
        .remove = i2c_atmel_remove,
        .driver = {
                .name = I2C_DRIVER_NAME,
index 7cdaff5..81d8a78 100644 (file)
@@ -716,7 +716,7 @@ static void tpm_tis_i2c_remove(struct i2c_client *client)
 
 static struct i2c_driver tpm_tis_i2c_driver = {
        .id_table = tpm_tis_i2c_table,
-       .probe_new = tpm_tis_i2c_probe,
+       .probe = tpm_tis_i2c_probe,
        .remove = tpm_tis_i2c_remove,
        .driver = {
                   .name = "tpm_i2c_infineon",
index a026e98..d7be03c 100644 (file)
@@ -650,7 +650,7 @@ static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume);
 
 static struct i2c_driver i2c_nuvoton_driver = {
        .id_table = i2c_nuvoton_id,
-       .probe_new = i2c_nuvoton_probe,
+       .probe = i2c_nuvoton_probe,
        .remove = i2c_nuvoton_remove,
        .driver = {
                .name = "tpm_i2c_nuvoton",
index 7db3593..7fa3d91 100644 (file)
@@ -89,7 +89,7 @@ static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
        tpm_tis_flush(iobase);
 }
 
-static int interrupts = -1;
+static int interrupts;
 module_param(interrupts, int, 0444);
 MODULE_PARM_DESC(interrupts, "Enable interrupts");
 
@@ -116,6 +116,22 @@ static int tpm_tis_disable_irq(const struct dmi_system_id *d)
 static const struct dmi_system_id tpm_tis_dmi_table[] = {
        {
                .callback = tpm_tis_disable_irq,
+               .ident = "Framework Laptop (12th Gen Intel Core)",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Laptop (12th Gen Intel Core)"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
+               .ident = "Framework Laptop (13th Gen Intel Core)",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Laptop (13th Gen Intel Core)"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
                .ident = "ThinkPad T490s",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
@@ -140,9 +156,34 @@ static const struct dmi_system_id tpm_tis_dmi_table[] = {
        },
        {
                .callback = tpm_tis_disable_irq,
+               .ident = "ThinkPad L590",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L590"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
+               .ident = "ThinkStation P620",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkStation P620"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
+               .ident = "TUXEDO InfinityBook S 15/17 Gen7",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "TUXEDO InfinityBook S 15/17 Gen7"),
+               },
+       },
+       {
+               .callback = tpm_tis_disable_irq,
                .ident = "UPX-TGL",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "UPX-TGL01"),
                },
        },
        {}
index 558144f..b959630 100644 (file)
 #include <linux/wait.h>
 #include <linux/acpi.h>
 #include <linux/freezer.h>
+#include <linux/dmi.h>
 #include "tpm.h"
 #include "tpm_tis_core.h"
 
+#define TPM_TIS_MAX_UNHANDLED_IRQS     1000
+
 static void tpm_tis_clkrun_enable(struct tpm_chip *chip, bool value);
 
 static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask,
@@ -363,8 +366,13 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
                goto out;
        }
 
-       size += recv_data(chip, &buf[TPM_HEADER_SIZE],
-                         expected - TPM_HEADER_SIZE);
+       rc = recv_data(chip, &buf[TPM_HEADER_SIZE],
+                      expected - TPM_HEADER_SIZE);
+       if (rc < 0) {
+               size = rc;
+               goto out;
+       }
+       size += rc;
        if (size < expected) {
                dev_err(&chip->dev, "Unable to read remainder of result\n");
                size = -ETIME;
@@ -468,25 +476,29 @@ out_err:
        return rc;
 }
 
-static void disable_interrupts(struct tpm_chip *chip)
+static void __tpm_tis_disable_interrupts(struct tpm_chip *chip)
+{
+       struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+       u32 int_mask = 0;
+
+       tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &int_mask);
+       int_mask &= ~TPM_GLOBAL_INT_ENABLE;
+       tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), int_mask);
+
+       chip->flags &= ~TPM_CHIP_FLAG_IRQ;
+}
+
+static void tpm_tis_disable_interrupts(struct tpm_chip *chip)
 {
        struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
-       u32 intmask;
-       int rc;
 
        if (priv->irq == 0)
                return;
 
-       rc = tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
-       if (rc < 0)
-               intmask = 0;
-
-       intmask &= ~TPM_GLOBAL_INT_ENABLE;
-       rc = tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
+       __tpm_tis_disable_interrupts(chip);
 
        devm_free_irq(chip->dev.parent, priv->irq, chip);
        priv->irq = 0;
-       chip->flags &= ~TPM_CHIP_FLAG_IRQ;
 }
 
 /*
@@ -552,7 +564,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
        if (!test_bit(TPM_TIS_IRQ_TESTED, &priv->flags))
                tpm_msleep(1);
        if (!test_bit(TPM_TIS_IRQ_TESTED, &priv->flags))
-               disable_interrupts(chip);
+               tpm_tis_disable_interrupts(chip);
        set_bit(TPM_TIS_IRQ_TESTED, &priv->flags);
        return rc;
 }
@@ -752,6 +764,57 @@ static bool tpm_tis_req_canceled(struct tpm_chip *chip, u8 status)
        return status == TPM_STS_COMMAND_READY;
 }
 
+static irqreturn_t tpm_tis_revert_interrupts(struct tpm_chip *chip)
+{
+       struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+       const char *product;
+       const char *vendor;
+
+       dev_warn(&chip->dev, FW_BUG
+                "TPM interrupt storm detected, polling instead\n");
+
+       vendor = dmi_get_system_info(DMI_SYS_VENDOR);
+       product = dmi_get_system_info(DMI_PRODUCT_VERSION);
+
+       if (vendor && product) {
+               dev_info(&chip->dev,
+                       "Consider adding the following entry to tpm_tis_dmi_table:\n");
+               dev_info(&chip->dev, "\tDMI_SYS_VENDOR: %s\n", vendor);
+               dev_info(&chip->dev, "\tDMI_PRODUCT_VERSION: %s\n", product);
+       }
+
+       if (tpm_tis_request_locality(chip, 0) != 0)
+               return IRQ_NONE;
+
+       __tpm_tis_disable_interrupts(chip);
+       tpm_tis_relinquish_locality(chip, 0);
+
+       schedule_work(&priv->free_irq_work);
+
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t tpm_tis_update_unhandled_irqs(struct tpm_chip *chip)
+{
+       struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+       irqreturn_t irqret = IRQ_HANDLED;
+
+       if (!(chip->flags & TPM_CHIP_FLAG_IRQ))
+               return IRQ_HANDLED;
+
+       if (time_after(jiffies, priv->last_unhandled_irq + HZ/10))
+               priv->unhandled_irqs = 1;
+       else
+               priv->unhandled_irqs++;
+
+       priv->last_unhandled_irq = jiffies;
+
+       if (priv->unhandled_irqs > TPM_TIS_MAX_UNHANDLED_IRQS)
+               irqret = tpm_tis_revert_interrupts(chip);
+
+       return irqret;
+}
+
 static irqreturn_t tis_int_handler(int dummy, void *dev_id)
 {
        struct tpm_chip *chip = dev_id;
@@ -761,10 +824,10 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
 
        rc = tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &interrupt);
        if (rc < 0)
-               return IRQ_NONE;
+               goto err;
 
        if (interrupt == 0)
-               return IRQ_NONE;
+               goto err;
 
        set_bit(TPM_TIS_IRQ_TESTED, &priv->flags);
        if (interrupt & TPM_INTF_DATA_AVAIL_INT)
@@ -780,10 +843,13 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
        rc = tpm_tis_write32(priv, TPM_INT_STATUS(priv->locality), interrupt);
        tpm_tis_relinquish_locality(chip, 0);
        if (rc < 0)
-               return IRQ_NONE;
+               goto err;
 
        tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &interrupt);
        return IRQ_HANDLED;
+
+err:
+       return tpm_tis_update_unhandled_irqs(chip);
 }
 
 static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
@@ -804,6 +870,15 @@ static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
                chip->flags &= ~TPM_CHIP_FLAG_IRQ;
 }
 
+static void tpm_tis_free_irq_func(struct work_struct *work)
+{
+       struct tpm_tis_data *priv = container_of(work, typeof(*priv), free_irq_work);
+       struct tpm_chip *chip = priv->chip;
+
+       devm_free_irq(chip->dev.parent, priv->irq, chip);
+       priv->irq = 0;
+}
+
 /* Register the IRQ and issue a command that will cause an interrupt. If an
  * irq is seen then leave the chip setup for IRQ operation, otherwise reverse
  * everything and leave in polling mode. Returns 0 on success.
@@ -816,6 +891,7 @@ static int tpm_tis_probe_irq_single(struct tpm_chip *chip, u32 intmask,
        int rc;
        u32 int_status;
 
+       INIT_WORK(&priv->free_irq_work, tpm_tis_free_irq_func);
 
        rc = devm_request_threaded_irq(chip->dev.parent, irq, NULL,
                                       tis_int_handler, IRQF_ONESHOT | flags,
@@ -918,6 +994,7 @@ void tpm_tis_remove(struct tpm_chip *chip)
                interrupt = 0;
 
        tpm_tis_write32(priv, reg, ~TPM_GLOBAL_INT_ENABLE & interrupt);
+       flush_work(&priv->free_irq_work);
 
        tpm_tis_clkrun_enable(chip, false);
 
@@ -1021,6 +1098,7 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
        chip->timeout_b = msecs_to_jiffies(TIS_TIMEOUT_B_MAX);
        chip->timeout_c = msecs_to_jiffies(TIS_TIMEOUT_C_MAX);
        chip->timeout_d = msecs_to_jiffies(TIS_TIMEOUT_D_MAX);
+       priv->chip = chip;
        priv->timeout_min = TPM_TIMEOUT_USECS_MIN;
        priv->timeout_max = TPM_TIMEOUT_USECS_MAX;
        priv->phy_ops = phy_ops;
@@ -1179,7 +1257,7 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
                        rc = tpm_tis_request_locality(chip, 0);
                        if (rc < 0)
                                goto out_err;
-                       disable_interrupts(chip);
+                       tpm_tis_disable_interrupts(chip);
                        tpm_tis_relinquish_locality(chip, 0);
                }
        }
index 610bfad..b1a169d 100644 (file)
@@ -91,11 +91,15 @@ enum tpm_tis_flags {
 };
 
 struct tpm_tis_data {
+       struct tpm_chip *chip;
        u16 manufacturer_id;
        struct mutex locality_count_mutex;
        unsigned int locality_count;
        int locality;
        int irq;
+       struct work_struct free_irq_work;
+       unsigned long last_unhandled_irq;
+       unsigned int unhandled_irqs;
        unsigned int int_mask;
        unsigned long flags;
        void __iomem *ilb_base_addr;
index c8c34ad..a897402 100644 (file)
@@ -189,21 +189,28 @@ static int tpm_tis_i2c_read_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
        int ret;
 
        for (i = 0; i < TPM_RETRY; i++) {
-               /* write register */
-               msg.len = sizeof(reg);
-               msg.buf = &reg;
-               msg.flags = 0;
-               ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
-               if (ret < 0)
-                       return ret;
-
-               /* read data */
-               msg.buf = result;
-               msg.len = len;
-               msg.flags = I2C_M_RD;
-               ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
-               if (ret < 0)
-                       return ret;
+               u16 read = 0;
+
+               while (read < len) {
+                       /* write register */
+                       msg.len = sizeof(reg);
+                       msg.buf = &reg;
+                       msg.flags = 0;
+                       ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
+                       if (ret < 0)
+                               return ret;
+
+                       /* read data */
+                       msg.buf = result + read;
+                       msg.len = len - read;
+                       msg.flags = I2C_M_RD;
+                       if (msg.len > I2C_SMBUS_BLOCK_MAX)
+                               msg.len = I2C_SMBUS_BLOCK_MAX;
+                       ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
+                       if (ret < 0)
+                               return ret;
+                       read += msg.len;
+               }
 
                ret = tpm_tis_i2c_sanity_check_read(reg, len, result);
                if (ret == 0)
@@ -223,19 +230,27 @@ static int tpm_tis_i2c_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
        struct i2c_msg msg = { .addr = phy->i2c_client->addr };
        u8 reg = tpm_tis_i2c_address_to_register(addr);
        int ret;
+       u16 wrote = 0;
 
        if (len > TPM_BUFSIZE - 1)
                return -EIO;
 
-       /* write register and data in one go */
        phy->io_buf[0] = reg;
-       memcpy(phy->io_buf + sizeof(reg), value, len);
-
-       msg.len = sizeof(reg) + len;
        msg.buf = phy->io_buf;
-       ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
-       if (ret < 0)
-               return ret;
+       while (wrote < len) {
+               /* write register and data in one go */
+               msg.len = sizeof(reg) + len - wrote;
+               if (msg.len > I2C_SMBUS_BLOCK_MAX)
+                       msg.len = I2C_SMBUS_BLOCK_MAX;
+
+               memcpy(phy->io_buf + sizeof(reg), value + wrote,
+                      msg.len - sizeof(reg));
+
+               ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
+               if (ret < 0)
+                       return ret;
+               wrote += msg.len - sizeof(reg);
+       }
 
        return 0;
 }
@@ -379,7 +394,7 @@ static struct i2c_driver tpm_tis_i2c_driver = {
                .pm = &tpm_tis_pm,
                .of_match_table = of_match_ptr(of_tis_i2c_match),
        },
-       .probe_new = tpm_tis_i2c_probe,
+       .probe = tpm_tis_i2c_probe,
        .remove = tpm_tis_i2c_remove,
        .id_table = tpm_tis_i2c_id,
 };
index 376ae18..e70abd6 100644 (file)
@@ -779,7 +779,7 @@ static void tpm_cr50_i2c_remove(struct i2c_client *client)
 static SIMPLE_DEV_PM_OPS(cr50_i2c_pm, tpm_pm_suspend, tpm_pm_resume);
 
 static struct i2c_driver cr50_i2c_driver = {
-       .probe_new = tpm_cr50_i2c_probe,
+       .probe = tpm_cr50_i2c_probe,
        .remove = tpm_cr50_i2c_remove,
        .driver = {
                .name = "cr50_i2c",
index 1f52079..9bfaba0 100644 (file)
@@ -136,6 +136,14 @@ int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
        }
 
 exit:
+       if (ret < 0) {
+               /* Deactivate chip select */
+               memset(&spi_xfer, 0, sizeof(spi_xfer));
+               spi_message_init(&m);
+               spi_message_add_tail(&spi_xfer, &m);
+               spi_sync_locked(phy->spi_device, &m);
+       }
+
        spi_bus_unlock(phy->spi_device->master);
        return ret;
 }
index 5c86598..30e9539 100644 (file)
@@ -683,37 +683,21 @@ static struct miscdevice vtpmx_miscdev = {
        .fops = &vtpmx_fops,
 };
 
-static int vtpmx_init(void)
-{
-       return misc_register(&vtpmx_miscdev);
-}
-
-static void vtpmx_cleanup(void)
-{
-       misc_deregister(&vtpmx_miscdev);
-}
-
 static int __init vtpm_module_init(void)
 {
        int rc;
 
-       rc = vtpmx_init();
-       if (rc) {
-               pr_err("couldn't create vtpmx device\n");
-               return rc;
-       }
-
        workqueue = create_workqueue("tpm-vtpm");
        if (!workqueue) {
                pr_err("couldn't create workqueue\n");
-               rc = -ENOMEM;
-               goto err_vtpmx_cleanup;
+               return -ENOMEM;
        }
 
-       return 0;
-
-err_vtpmx_cleanup:
-       vtpmx_cleanup();
+       rc = misc_register(&vtpmx_miscdev);
+       if (rc) {
+               pr_err("couldn't create vtpmx device\n");
+               destroy_workqueue(workqueue);
+       }
 
        return rc;
 }
@@ -721,7 +705,7 @@ err_vtpmx_cleanup:
 static void __exit vtpm_module_exit(void)
 {
        destroy_workqueue(workqueue);
-       vtpmx_cleanup();
+       misc_deregister(&vtpmx_miscdev);
 }
 
 module_init(vtpm_module_init);
index 93f38a8..6b3b424 100644 (file)
@@ -444,6 +444,7 @@ config COMMON_CLK_BD718XX
 config COMMON_CLK_FIXED_MMIO
        bool "Clock driver for Memory Mapped Fixed values"
        depends on COMMON_CLK && OF
+       depends on HAS_IOMEM
        help
          Support for Memory Mapped IO Fixed clocks
 
index 4fb4fd4..737aa70 100644 (file)
@@ -205,18 +205,19 @@ EXPORT_SYMBOL(devm_clk_put);
 struct clk *devm_get_clk_from_child(struct device *dev,
                                    struct device_node *np, const char *con_id)
 {
-       struct clk **ptr, *clk;
+       struct devm_clk_state *state;
+       struct clk *clk;
 
-       ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
-       if (!ptr)
+       state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL);
+       if (!state)
                return ERR_PTR(-ENOMEM);
 
        clk = of_clk_get_by_name(np, con_id);
        if (!IS_ERR(clk)) {
-               *ptr = clk;
-               devres_add(dev, ptr);
+               state->clk = clk;
+               devres_add(dev, state);
        } else {
-               devres_free(ptr);
+               devres_free(state);
        }
 
        return clk;
index b6c7c27..44f4351 100644 (file)
@@ -291,7 +291,7 @@ static int imx93_clocks_probe(struct platform_device *pdev)
        anatop_base = devm_of_iomap(dev, np, 0, NULL);
        of_node_put(np);
        if (WARN_ON(IS_ERR(anatop_base))) {
-               ret = PTR_ERR(base);
+               ret = PTR_ERR(anatop_base);
                goto unregister_hws;
        }
 
index d33f741..935d9a2 100644 (file)
@@ -151,8 +151,10 @@ static int ti_syscon_gate_clk_probe(struct platform_device *pdev)
                                 data[i].name);
        }
 
-       return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
-                                          hw_data);
+       if (num_clks == 1)
+               return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get,
+                                                  hw_data->hws[0]);
+       return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, hw_data);
 }
 
 #define TI_SYSCON_CLK_GATE(_name, _offset, _bit_idx)   \
index 1ba421b..e31f943 100644 (file)
@@ -328,6 +328,14 @@ static const char * const atb_parents[] = {
        "syspll_d5"
 };
 
+static const char * const sspm_parents[] = {
+       "clk26m",
+       "univpll_d2_d4",
+       "syspll_d2_d2",
+       "univpll_d2_d2",
+       "syspll_d3"
+};
+
 static const char * const dpi0_parents[] = {
        "clk26m",
        "tvdpll_d2",
@@ -507,6 +515,9 @@ static const struct mtk_mux top_muxes[] = {
        /* CLK_CFG_6 */
        MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_ATB, "atb_sel",
                atb_parents, 0xa0, 0xa4, 0xa8, 0, 2, 7, 0x004, 24),
+       MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_MUX_SSPM, "sspm_sel",
+                                  sspm_parents, 0xa0, 0xa4, 0xa8, 8, 3, 15, 0x004, 25,
+                                  CLK_IS_CRITICAL | CLK_SET_RATE_PARENT),
        MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_DPI0, "dpi0_sel",
                dpi0_parents, 0xa0, 0xa4, 0xa8, 16, 4, 23, 0x004, 26),
        MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_SCAM, "scam_sel",
@@ -673,10 +684,18 @@ static const struct mtk_gate_regs infra3_cg_regs = {
        GATE_MTK(_id, _name, _parent, &infra2_cg_regs, _shift,  \
                &mtk_clk_gate_ops_setclr)
 
+#define GATE_INFRA2_FLAGS(_id, _name, _parent, _shift, _flag)  \
+       GATE_MTK_FLAGS(_id, _name, _parent, &infra2_cg_regs,    \
+                      _shift, &mtk_clk_gate_ops_setclr, _flag)
+
 #define GATE_INFRA3(_id, _name, _parent, _shift)               \
        GATE_MTK(_id, _name, _parent, &infra3_cg_regs, _shift,  \
                &mtk_clk_gate_ops_setclr)
 
+#define GATE_INFRA3_FLAGS(_id, _name, _parent, _shift, _flag)  \
+       GATE_MTK_FLAGS(_id, _name, _parent, &infra3_cg_regs,    \
+                      _shift, &mtk_clk_gate_ops_setclr, _flag)
+
 static const struct mtk_gate infra_clks[] = {
        /* INFRA0 */
        GATE_INFRA0(CLK_INFRA_PMIC_TMR, "infra_pmic_tmr", "axi_sel", 0),
@@ -748,7 +767,11 @@ static const struct mtk_gate infra_clks[] = {
        GATE_INFRA2(CLK_INFRA_UNIPRO_TICK, "infra_unipro_tick", "fufs_sel", 12),
        GATE_INFRA2(CLK_INFRA_UFS_MP_SAP_BCLK, "infra_ufs_mp_sap_bck", "fufs_sel", 13),
        GATE_INFRA2(CLK_INFRA_MD32_BCLK, "infra_md32_bclk", "axi_sel", 14),
+       /* infra_sspm is main clock in co-processor, should not be closed in Linux. */
+       GATE_INFRA2_FLAGS(CLK_INFRA_SSPM, "infra_sspm", "sspm_sel", 15, CLK_IS_CRITICAL),
        GATE_INFRA2(CLK_INFRA_UNIPRO_MBIST, "infra_unipro_mbist", "axi_sel", 16),
+       /* infra_sspm_bus_hclk is main clock in co-processor, should not be closed in Linux. */
+       GATE_INFRA2_FLAGS(CLK_INFRA_SSPM_BUS_HCLK, "infra_sspm_bus_hclk", "axi_sel", 17, CLK_IS_CRITICAL),
        GATE_INFRA2(CLK_INFRA_I2C5, "infra_i2c5", "i2c_sel", 18),
        GATE_INFRA2(CLK_INFRA_I2C5_ARBITER, "infra_i2c5_arbiter", "i2c_sel", 19),
        GATE_INFRA2(CLK_INFRA_I2C5_IMM, "infra_i2c5_imm", "i2c_sel", 20),
@@ -766,6 +789,10 @@ static const struct mtk_gate infra_clks[] = {
        GATE_INFRA3(CLK_INFRA_MSDC0_SELF, "infra_msdc0_self", "msdc50_0_sel", 0),
        GATE_INFRA3(CLK_INFRA_MSDC1_SELF, "infra_msdc1_self", "msdc50_0_sel", 1),
        GATE_INFRA3(CLK_INFRA_MSDC2_SELF, "infra_msdc2_self", "msdc50_0_sel", 2),
+       /* infra_sspm_26m_self is main clock in co-processor, should not be closed in Linux. */
+       GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_26M_SELF, "infra_sspm_26m_self", "f_f26m_ck", 3, CLK_IS_CRITICAL),
+       /* infra_sspm_32k_self is main clock in co-processor, should not be closed in Linux. */
+       GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_32K_SELF, "infra_sspm_32k_self", "f_f26m_ck", 4, CLK_IS_CRITICAL),
        GATE_INFRA3(CLK_INFRA_UFS_AXI, "infra_ufs_axi", "axi_sel", 5),
        GATE_INFRA3(CLK_INFRA_I2C6, "infra_i2c6", "i2c_sel", 6),
        GATE_INFRA3(CLK_INFRA_AP_MSDC0, "infra_ap_msdc0", "msdc50_hclk_sel", 7),
index 8fef90b..6fa7639 100644 (file)
@@ -367,9 +367,9 @@ static int meson_clk_pll_enable(struct clk_hw *hw)
         * 3. enable the lock detect module
         */
        if (MESON_PARM_APPLICABLE(&pll->current_en)) {
-               usleep_range(10, 20);
+               udelay(10);
                meson_parm_write(clk->map, &pll->current_en, 1);
-               usleep_range(40, 50);
+               udelay(40);
        }
 
        if (MESON_PARM_APPLICABLE(&pll->l_detect)) {
index bca21df..62962ae 100644 (file)
@@ -3,13 +3,6 @@
 # Counter devices
 #
 
-menuconfig COUNTER
-       tristate "Counter support"
-       help
-         This enables counter device support through the Generic Counter
-         interface. You only need to enable this, if you also want to enable
-         one or more of the counter device drivers below.
-
 config I8254
        tristate
        select COUNTER
@@ -25,6 +18,13 @@ config I8254
 
          If built as a module its name will be i8254.
 
+menuconfig COUNTER
+       tristate "Counter support"
+       help
+         This enables counter device support through the Generic Counter
+         interface. You only need to enable this, if you also want to enable
+         one or more of the counter device drivers below.
+
 if COUNTER
 
 config 104_QUAD_8
index b2f05d2..37f1cdf 100644 (file)
@@ -1011,22 +1011,20 @@ static int __init acpi_cpufreq_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int acpi_cpufreq_remove(struct platform_device *pdev)
+static void acpi_cpufreq_remove(struct platform_device *pdev)
 {
        pr_debug("%s\n", __func__);
 
        cpufreq_unregister_driver(&acpi_cpufreq_driver);
 
        free_acpi_perf_data();
-
-       return 0;
 }
 
 static struct platform_driver acpi_cpufreq_platdrv = {
        .driver = {
                .name   = "acpi-cpufreq",
        },
-       .remove         = acpi_cpufreq_remove,
+       .remove_new     = acpi_cpufreq_remove,
 };
 
 static int __init acpi_cpufreq_init(void)
index 7f3fe20..f04ae67 100644 (file)
@@ -64,27 +64,9 @@ static struct amd_pstate_ut_struct amd_pstate_ut_cases[] = {
 static bool get_shared_mem(void)
 {
        bool result = false;
-       char path[] = "/sys/module/amd_pstate/parameters/shared_mem";
-       char buf[5] = {0};
-       struct file *filp = NULL;
-       loff_t pos = 0;
-       ssize_t ret;
-
-       if (!boot_cpu_has(X86_FEATURE_CPPC)) {
-               filp = filp_open(path, O_RDONLY, 0);
-               if (IS_ERR(filp))
-                       pr_err("%s unable to open %s file!\n", __func__, path);
-               else {
-                       ret = kernel_read(filp, &buf, sizeof(buf), &pos);
-                       if (ret < 0)
-                               pr_err("%s read %s file fail ret=%ld!\n",
-                                       __func__, path, (long)ret);
-                       filp_close(filp, NULL);
-               }
 
-               if ('Y' == *buf)
-                       result = true;
-       }
+       if (!boot_cpu_has(X86_FEATURE_CPPC))
+               result = true;
 
        return result;
 }
@@ -145,8 +127,6 @@ static void amd_pstate_ut_check_perf(u32 index)
        struct cpufreq_policy *policy = NULL;
        struct amd_cpudata *cpudata = NULL;
 
-       highest_perf = amd_get_highest_perf();
-
        for_each_possible_cpu(cpu) {
                policy = cpufreq_cpu_get(cpu);
                if (!policy)
@@ -158,9 +138,10 @@ static void amd_pstate_ut_check_perf(u32 index)
                        if (ret) {
                                amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
                                pr_err("%s cppc_get_perf_caps ret=%d error!\n", __func__, ret);
-                               return;
+                               goto skip_test;
                        }
 
+                       highest_perf = cppc_perf.highest_perf;
                        nominal_perf = cppc_perf.nominal_perf;
                        lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
                        lowest_perf = cppc_perf.lowest_perf;
@@ -169,9 +150,10 @@ static void amd_pstate_ut_check_perf(u32 index)
                        if (ret) {
                                amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
                                pr_err("%s read CPPC_CAP1 ret=%d error!\n", __func__, ret);
-                               return;
+                               goto skip_test;
                        }
 
+                       highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
                        nominal_perf = AMD_CPPC_NOMINAL_PERF(cap1);
                        lowest_nonlinear_perf = AMD_CPPC_LOWNONLIN_PERF(cap1);
                        lowest_perf = AMD_CPPC_LOWEST_PERF(cap1);
@@ -187,7 +169,7 @@ static void amd_pstate_ut_check_perf(u32 index)
                                nominal_perf, cpudata->nominal_perf,
                                lowest_nonlinear_perf, cpudata->lowest_nonlinear_perf,
                                lowest_perf, cpudata->lowest_perf);
-                       return;
+                       goto skip_test;
                }
 
                if (!((highest_perf >= nominal_perf) &&
@@ -198,11 +180,15 @@ static void amd_pstate_ut_check_perf(u32 index)
                        pr_err("%s cpu%d highest=%d >= nominal=%d > lowest_nonlinear=%d > lowest=%d > 0, the formula is incorrect!\n",
                                __func__, cpu, highest_perf, nominal_perf,
                                lowest_nonlinear_perf, lowest_perf);
-                       return;
+                       goto skip_test;
                }
+               cpufreq_cpu_put(policy);
        }
 
        amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
+       return;
+skip_test:
+       cpufreq_cpu_put(policy);
 }
 
 /*
@@ -230,14 +216,14 @@ static void amd_pstate_ut_check_freq(u32 index)
                        pr_err("%s cpu%d max=%d >= nominal=%d > lowest_nonlinear=%d > min=%d > 0, the formula is incorrect!\n",
                                __func__, cpu, cpudata->max_freq, cpudata->nominal_freq,
                                cpudata->lowest_nonlinear_freq, cpudata->min_freq);
-                       return;
+                       goto skip_test;
                }
 
                if (cpudata->min_freq != policy->min) {
                        amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
                        pr_err("%s cpu%d cpudata_min_freq=%d policy_min=%d, they should be equal!\n",
                                __func__, cpu, cpudata->min_freq, policy->min);
-                       return;
+                       goto skip_test;
                }
 
                if (cpudata->boost_supported) {
@@ -249,16 +235,20 @@ static void amd_pstate_ut_check_freq(u32 index)
                                pr_err("%s cpu%d policy_max=%d should be equal cpu_max=%d or cpu_nominal=%d !\n",
                                        __func__, cpu, policy->max, cpudata->max_freq,
                                        cpudata->nominal_freq);
-                               return;
+                               goto skip_test;
                        }
                } else {
                        amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_FAIL;
                        pr_err("%s cpu%d must support boost!\n", __func__, cpu);
-                       return;
+                       goto skip_test;
                }
+               cpufreq_cpu_put(policy);
        }
 
        amd_pstate_ut_cases[index].result = AMD_PSTATE_UT_RESULT_PASS;
+       return;
+skip_test:
+       cpufreq_cpu_put(policy);
 }
 
 static int __init amd_pstate_ut_init(void)
index 81fba0d..9a1e194 100644 (file)
@@ -1012,8 +1012,8 @@ static int amd_pstate_update_status(const char *buf, size_t size)
        return 0;
 }
 
-static ssize_t show_status(struct kobject *kobj,
-                          struct kobj_attribute *attr, char *buf)
+static ssize_t status_show(struct device *dev,
+                          struct device_attribute *attr, char *buf)
 {
        ssize_t ret;
 
@@ -1024,7 +1024,7 @@ static ssize_t show_status(struct kobject *kobj,
        return ret;
 }
 
-static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
+static ssize_t status_store(struct device *a, struct device_attribute *b,
                            const char *buf, size_t count)
 {
        char *p = memchr(buf, '\n', count);
@@ -1043,7 +1043,7 @@ cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
 cpufreq_freq_attr_ro(amd_pstate_highest_perf);
 cpufreq_freq_attr_rw(energy_performance_preference);
 cpufreq_freq_attr_ro(energy_performance_available_preferences);
-define_one_global_rw(status);
+static DEVICE_ATTR_RW(status);
 
 static struct freq_attr *amd_pstate_attr[] = {
        &amd_pstate_max_freq,
@@ -1062,7 +1062,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
 };
 
 static struct attribute *pstate_global_attributes[] = {
-       &status.attr,
+       &dev_attr_status.attr,
        NULL
 };
 
index b74289a..bea41cc 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/regmap.h>
index ffea640..35fb3a5 100644 (file)
@@ -434,7 +434,11 @@ brcm_avs_get_freq_table(struct device *dev, struct private_data *priv)
        if (ret)
                return ERR_PTR(ret);
 
-       table = devm_kcalloc(dev, AVS_PSTATE_MAX + 1, sizeof(*table),
+       /*
+        * We allocate space for the 5 different P-STATES AVS,
+        * plus extra space for a terminating element.
+        */
+       table = devm_kcalloc(dev, AVS_PSTATE_MAX + 1 + 1, sizeof(*table),
                             GFP_KERNEL);
        if (!table)
                return ERR_PTR(-ENOMEM);
@@ -749,13 +753,11 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int brcm_avs_cpufreq_remove(struct platform_device *pdev)
+static void brcm_avs_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&brcm_avs_driver);
 
        brcm_avs_prepare_uninit(pdev);
-
-       return 0;
 }
 
 static const struct of_device_id brcm_avs_cpufreq_match[] = {
@@ -770,7 +772,7 @@ static struct platform_driver brcm_avs_cpufreq_platdrv = {
                .of_match_table = brcm_avs_cpufreq_match,
        },
        .probe          = brcm_avs_cpufreq_probe,
-       .remove         = brcm_avs_cpufreq_remove,
+       .remove_new     = brcm_avs_cpufreq_remove,
 };
 module_platform_driver(brcm_avs_cpufreq_platdrv);
 
index 022e355..fe08ca4 100644 (file)
@@ -249,15 +249,19 @@ static void __init cppc_freq_invariance_init(void)
                return;
 
        kworker_fie = kthread_create_worker(0, "cppc_fie");
-       if (IS_ERR(kworker_fie))
+       if (IS_ERR(kworker_fie)) {
+               pr_warn("%s: failed to create kworker_fie: %ld\n", __func__,
+                       PTR_ERR(kworker_fie));
+               fie_disabled = FIE_DISABLED;
                return;
+       }
 
        ret = sched_setattr_nocheck(kworker_fie->task, &attr);
        if (ret) {
                pr_warn("%s: failed to set SCHED_DEADLINE: %d\n", __func__,
                        ret);
                kthread_destroy_worker(kworker_fie);
-               return;
+               fie_disabled = FIE_DISABLED;
        }
 }
 
@@ -267,7 +271,6 @@ static void cppc_freq_invariance_exit(void)
                return;
 
        kthread_destroy_worker(kworker_fie);
-       kworker_fie = NULL;
 }
 
 #else
@@ -849,13 +852,13 @@ static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
 
        ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
        if (ret)
-               return ret;
+               return 0;
 
        udelay(2); /* 2usec delay between sampling */
 
        ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
        if (ret)
-               return ret;
+               return 0;
 
        delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
                                               &fb_ctrs_t1);
index e2b2008..fb2875c 100644 (file)
@@ -143,14 +143,19 @@ static const struct of_device_id blocklist[] __initconst = {
 
        { .compatible = "qcom,apq8096", },
        { .compatible = "qcom,msm8996", },
+       { .compatible = "qcom,msm8998", },
+       { .compatible = "qcom,qcm2290", },
        { .compatible = "qcom,qcs404", },
+       { .compatible = "qcom,qdu1000", },
        { .compatible = "qcom,sa8155p" },
        { .compatible = "qcom,sa8540p" },
+       { .compatible = "qcom,sa8775p" },
        { .compatible = "qcom,sc7180", },
        { .compatible = "qcom,sc7280", },
        { .compatible = "qcom,sc8180x", },
        { .compatible = "qcom,sc8280xp", },
        { .compatible = "qcom,sdm845", },
+       { .compatible = "qcom,sdx75", },
        { .compatible = "qcom,sm6115", },
        { .compatible = "qcom,sm6350", },
        { .compatible = "qcom,sm6375", },
@@ -158,6 +163,8 @@ static const struct of_device_id blocklist[] __initconst = {
        { .compatible = "qcom,sm8150", },
        { .compatible = "qcom,sm8250", },
        { .compatible = "qcom,sm8350", },
+       { .compatible = "qcom,sm8450", },
+       { .compatible = "qcom,sm8550", },
 
        { .compatible = "st,stih407", },
        { .compatible = "st,stih410", },
index 4aec4b2..8bd6e5e 100644 (file)
@@ -349,11 +349,10 @@ err:
        return ret;
 }
 
-static int dt_cpufreq_remove(struct platform_device *pdev)
+static void dt_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&dt_cpufreq_driver);
        dt_cpufreq_release();
-       return 0;
 }
 
 static struct platform_driver dt_cpufreq_platdrv = {
@@ -361,7 +360,7 @@ static struct platform_driver dt_cpufreq_platdrv = {
                .name   = "cpufreq-dt",
        },
        .probe          = dt_cpufreq_probe,
-       .remove         = dt_cpufreq_remove,
+       .remove_new     = dt_cpufreq_remove,
 };
 module_platform_driver(dt_cpufreq_platdrv);
 
index 50bbc96..a757f90 100644 (file)
@@ -1234,16 +1234,16 @@ static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
        ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MIN,
                                    &policy->nb_min);
        if (ret) {
-               dev_err(dev, "Failed to register MIN QoS notifier: %d (%*pbl)\n",
-                       ret, cpumask_pr_args(policy->cpus));
+               dev_err(dev, "Failed to register MIN QoS notifier: %d (CPU%u)\n",
+                       ret, cpu);
                goto err_kobj_remove;
        }
 
        ret = freq_qos_add_notifier(&policy->constraints, FREQ_QOS_MAX,
                                    &policy->nb_max);
        if (ret) {
-               dev_err(dev, "Failed to register MAX QoS notifier: %d (%*pbl)\n",
-                       ret, cpumask_pr_args(policy->cpus));
+               dev_err(dev, "Failed to register MAX QoS notifier: %d (CPU%u)\n",
+                       ret, cpu);
                goto err_min_qos_notifier;
        }
 
index 55c7ffd..a33df3c 100644 (file)
@@ -243,7 +243,8 @@ void cpufreq_stats_create_table(struct cpufreq_policy *policy)
 
        /* Find valid-unique entries */
        cpufreq_for_each_valid_entry(pos, policy->freq_table)
-               if (freq_table_get_index(stats, pos->frequency) == -1)
+               if (policy->freq_table_sorted != CPUFREQ_TABLE_UNSORTED ||
+                   freq_table_get_index(stats, pos->frequency) == -1)
                        stats->freq_table[i++] = pos->frequency;
 
        stats->state_num = i;
index ebb3a81..7d27544 100644 (file)
@@ -131,7 +131,7 @@ static int __init davinci_cpufreq_probe(struct platform_device *pdev)
        return cpufreq_register_driver(&davinci_driver);
 }
 
-static int __exit davinci_cpufreq_remove(struct platform_device *pdev)
+static void __exit davinci_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&davinci_driver);
 
@@ -139,15 +139,13 @@ static int __exit davinci_cpufreq_remove(struct platform_device *pdev)
 
        if (cpufreq.asyncclk)
                clk_put(cpufreq.asyncclk);
-
-       return 0;
 }
 
 static struct platform_driver davinci_cpufreq_driver = {
        .driver = {
                .name    = "cpufreq-davinci",
        },
-       .remove = __exit_p(davinci_cpufreq_remove),
+       .remove_new = __exit_p(davinci_cpufreq_remove),
 };
 
 int __init davinci_cpufreq_init(void)
index 535867a..577bb9e 100644 (file)
@@ -172,20 +172,18 @@ static int imx_cpufreq_dt_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int imx_cpufreq_dt_remove(struct platform_device *pdev)
+static void imx_cpufreq_dt_remove(struct platform_device *pdev)
 {
        platform_device_unregister(cpufreq_dt_pdev);
        if (!of_machine_is_compatible("fsl,imx7ulp"))
                dev_pm_opp_put_supported_hw(cpufreq_opp_token);
        else
                clk_bulk_put(ARRAY_SIZE(imx7ulp_clks), imx7ulp_clks);
-
-       return 0;
 }
 
 static struct platform_driver imx_cpufreq_dt_driver = {
        .probe = imx_cpufreq_dt_probe,
-       .remove = imx_cpufreq_dt_remove,
+       .remove_new = imx_cpufreq_dt_remove,
        .driver = {
                .name = "imx-cpufreq-dt",
        },
index 9fb1501..494d044 100644 (file)
@@ -519,7 +519,7 @@ put_node:
        return ret;
 }
 
-static int imx6q_cpufreq_remove(struct platform_device *pdev)
+static void imx6q_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&imx6q_cpufreq_driver);
        dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
@@ -530,8 +530,6 @@ static int imx6q_cpufreq_remove(struct platform_device *pdev)
        regulator_put(soc_reg);
 
        clk_bulk_put(num_clks, clks);
-
-       return 0;
 }
 
 static struct platform_driver imx6q_cpufreq_platdrv = {
@@ -539,7 +537,7 @@ static struct platform_driver imx6q_cpufreq_platdrv = {
                .name   = "imx6q-cpufreq",
        },
        .probe          = imx6q_cpufreq_probe,
-       .remove         = imx6q_cpufreq_remove,
+       .remove_new     = imx6q_cpufreq_remove,
 };
 module_platform_driver(imx6q_cpufreq_platdrv);
 
index 8ca2bce..dc50c9f 100644 (file)
@@ -2609,6 +2609,11 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
                        intel_pstate_clear_update_util_hook(policy->cpu);
                intel_pstate_hwp_set(policy->cpu);
        }
+       /*
+        * policy->cur is never updated with the intel_pstate driver, but it
+        * is used as a stale frequency value. So, keep it within limits.
+        */
+       policy->cur = policy->min;
 
        mutex_unlock(&intel_pstate_limits_lock);
 
index 9558810..fd20b98 100644 (file)
@@ -178,20 +178,18 @@ out_node:
        return err;
 }
 
-static int kirkwood_cpufreq_remove(struct platform_device *pdev)
+static void kirkwood_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&kirkwood_cpufreq_driver);
 
        clk_disable_unprepare(priv.powersave_clk);
        clk_disable_unprepare(priv.ddr_clk);
        clk_disable_unprepare(priv.cpu_clk);
-
-       return 0;
 }
 
 static struct platform_driver kirkwood_cpufreq_platform_driver = {
        .probe = kirkwood_cpufreq_probe,
-       .remove = kirkwood_cpufreq_remove,
+       .remove_new = kirkwood_cpufreq_remove,
        .driver = {
                .name = "kirkwood-cpufreq",
        },
index b22f5cc..d46afb3 100644 (file)
@@ -10,8 +10,9 @@
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #define LUT_MAX_ENTRIES                        32U
@@ -315,11 +316,9 @@ static int mtk_cpufreq_hw_driver_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int mtk_cpufreq_hw_driver_remove(struct platform_device *pdev)
+static void mtk_cpufreq_hw_driver_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&cpufreq_mtk_hw_driver);
-
-       return 0;
 }
 
 static const struct of_device_id mtk_cpufreq_hw_match[] = {
@@ -330,7 +329,7 @@ MODULE_DEVICE_TABLE(of, mtk_cpufreq_hw_match);
 
 static struct platform_driver mtk_cpufreq_hw_driver = {
        .probe = mtk_cpufreq_hw_driver_probe,
-       .remove = mtk_cpufreq_hw_driver_remove,
+       .remove_new = mtk_cpufreq_hw_driver_remove,
        .driver = {
                .name = "mtk-cpufreq-hw",
                .of_match_table = mtk_cpufreq_hw_match,
index fef68cb..a0a6191 100644 (file)
@@ -313,8 +313,6 @@ out:
        return ret;
 }
 
-#define DYNAMIC_POWER "dynamic-power-coefficient"
-
 static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
                                    unsigned long event, void *data)
 {
index 81649a1..8956908 100644 (file)
@@ -182,11 +182,9 @@ static int omap_cpufreq_probe(struct platform_device *pdev)
        return cpufreq_register_driver(&omap_driver);
 }
 
-static int omap_cpufreq_remove(struct platform_device *pdev)
+static void omap_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&omap_driver);
-
-       return 0;
 }
 
 static struct platform_driver omap_cpufreq_platdrv = {
@@ -194,7 +192,7 @@ static struct platform_driver omap_cpufreq_platdrv = {
                .name   = "omap-cpufreq",
        },
        .probe          = omap_cpufreq_probe,
-       .remove         = omap_cpufreq_remove,
+       .remove_new     = omap_cpufreq_remove,
 };
 module_platform_driver(omap_cpufreq_platdrv);
 
index 73efbcf..84fe37d 100644 (file)
@@ -608,22 +608,20 @@ static int __init pcc_cpufreq_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int pcc_cpufreq_remove(struct platform_device *pdev)
+static void pcc_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&pcc_cpufreq_driver);
 
        pcc_clear_mapping();
 
        free_percpu(pcc_cpu_info);
-
-       return 0;
 }
 
 static struct platform_driver pcc_cpufreq_platdrv = {
        .driver = {
                .name   = "pcc-cpufreq",
        },
-       .remove         = pcc_cpufreq_remove,
+       .remove_new     = pcc_cpufreq_remove,
 };
 
 static int __init pcc_cpufreq_init(void)
index d289036..b10f7a1 100644 (file)
@@ -1101,7 +1101,8 @@ static int powernowk8_cpu_exit(struct cpufreq_policy *pol)
 
        kfree(data->powernow_table);
        kfree(data);
-       for_each_cpu(cpu, pol->cpus)
+       /* pol->cpus will be empty here, use related_cpus instead. */
+       for_each_cpu(cpu, pol->related_cpus)
                per_cpu(powernow_data, cpu) = NULL;
 
        return 0;
index e3313ce..88afc49 100644 (file)
@@ -9,7 +9,7 @@
 
 #include <linux/cpufreq.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 
 #include <asm/machdep.h>
 #include <asm/cell-regs.h>
index 4fba363..6f0c325 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/types.h>
 #include <linux/timer.h>
 #include <linux/init.h>
-#include <linux/of_platform.h>
 #include <linux/pm_qos.h>
 #include <linux/slab.h>
 
index f283037..70b0f21 100644 (file)
@@ -28,7 +28,7 @@
 
 #define GT_IRQ_STATUS                  BIT(2)
 
-#define MAX_FREQ_DOMAINS               3
+#define MAX_FREQ_DOMAINS               4
 
 struct qcom_cpufreq_soc_data {
        u32 reg_enable;
@@ -730,16 +730,14 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int qcom_cpufreq_hw_driver_remove(struct platform_device *pdev)
+static void qcom_cpufreq_hw_driver_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&cpufreq_qcom_hw_driver);
-
-       return 0;
 }
 
 static struct platform_driver qcom_cpufreq_hw_driver = {
        .probe = qcom_cpufreq_hw_driver_probe,
-       .remove = qcom_cpufreq_hw_driver_remove,
+       .remove_new = qcom_cpufreq_hw_driver_remove,
        .driver = {
                .name = "qcom-cpufreq-hw",
                .of_match_table = qcom_cpufreq_hw_match,
index a88b6fe..84d7033 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_opp.h>
@@ -334,7 +333,7 @@ free_drv:
        return ret;
 }
 
-static int qcom_cpufreq_remove(struct platform_device *pdev)
+static void qcom_cpufreq_remove(struct platform_device *pdev)
 {
        struct qcom_cpufreq_drv *drv = platform_get_drvdata(pdev);
        unsigned int cpu;
@@ -346,13 +345,11 @@ static int qcom_cpufreq_remove(struct platform_device *pdev)
 
        kfree(drv->opp_tokens);
        kfree(drv);
-
-       return 0;
 }
 
 static struct platform_driver qcom_cpufreq_driver = {
        .probe = qcom_cpufreq_probe,
-       .remove = qcom_cpufreq_remove,
+       .remove_new = qcom_cpufreq_remove,
        .driver = {
                .name = "qcom-cpufreq-nvmem",
        },
index 573b417..0aecaec 100644 (file)
@@ -288,11 +288,9 @@ static int qoriq_cpufreq_probe(struct platform_device *pdev)
        return 0;
 }
 
-static int qoriq_cpufreq_remove(struct platform_device *pdev)
+static void qoriq_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&qoriq_cpufreq_driver);
-
-       return 0;
 }
 
 static struct platform_driver qoriq_cpufreq_platform_driver = {
@@ -300,7 +298,7 @@ static struct platform_driver qoriq_cpufreq_platform_driver = {
                .name = "qoriq-cpufreq",
        },
        .probe = qoriq_cpufreq_probe,
-       .remove = qoriq_cpufreq_remove,
+       .remove_new = qoriq_cpufreq_remove,
 };
 module_platform_driver(qoriq_cpufreq_platform_driver);
 
index 2bc7d97..e0705cc 100644 (file)
@@ -65,7 +65,7 @@ remove_opp:
        return ret;
 }
 
-static int raspberrypi_cpufreq_remove(struct platform_device *pdev)
+static void raspberrypi_cpufreq_remove(struct platform_device *pdev)
 {
        struct device *cpu_dev;
 
@@ -74,8 +74,6 @@ static int raspberrypi_cpufreq_remove(struct platform_device *pdev)
                dev_pm_opp_remove_all_dynamic(cpu_dev);
 
        platform_device_unregister(cpufreq_dt);
-
-       return 0;
 }
 
 /*
@@ -87,7 +85,7 @@ static struct platform_driver raspberrypi_cpufreq_driver = {
                .name = "raspberrypi-cpufreq",
        },
        .probe          = raspberrypi_cpufreq_probe,
-       .remove         = raspberrypi_cpufreq_remove,
+       .remove_new     = raspberrypi_cpufreq_remove,
 };
 module_platform_driver(raspberrypi_cpufreq_driver);
 
index fd2c168..d33be56 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/cpumask.h>
 #include <linux/export.h>
 #include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/scpi_protocol.h>
 #include <linux/slab.h>
@@ -208,11 +208,10 @@ static int scpi_cpufreq_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int scpi_cpufreq_remove(struct platform_device *pdev)
+static void scpi_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&scpi_cpufreq_driver);
        scpi_ops = NULL;
-       return 0;
 }
 
 static struct platform_driver scpi_cpufreq_platdrv = {
@@ -220,7 +219,7 @@ static struct platform_driver scpi_cpufreq_platdrv = {
                .name   = "scpi-cpufreq",
        },
        .probe          = scpi_cpufreq_probe,
-       .remove         = scpi_cpufreq_remove,
+       .remove_new     = scpi_cpufreq_remove,
 };
 module_platform_driver(scpi_cpufreq_platdrv);
 
index d3510cf..2783d3d 100644 (file)
@@ -269,7 +269,7 @@ static int us2e_freq_target(struct cpufreq_policy *policy, unsigned int index)
        return smp_call_function_single(cpu, __us2e_freq_target, &index, 1);
 }
 
-static int __init us2e_freq_cpu_init(struct cpufreq_policy *policy)
+static int us2e_freq_cpu_init(struct cpufreq_policy *policy)
 {
        unsigned int cpu = policy->cpu;
        unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000;
index 91d1ed5..6c36576 100644 (file)
@@ -117,7 +117,7 @@ static int us3_freq_target(struct cpufreq_policy *policy, unsigned int index)
        return smp_call_function_single(cpu, update_safari_cfg, &new_bits, 1);
 }
 
-static int __init us3_freq_cpu_init(struct cpufreq_policy *policy)
+static int us3_freq_cpu_init(struct cpufreq_policy *policy)
 {
        unsigned int cpu = policy->cpu;
        unsigned long clock_tick = sparc64_get_clock_tick(cpu) / 1000;
index 1a63aee..9c542e7 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/regmap.h>
 
index 4321d7b..32a9c88 100644 (file)
@@ -137,7 +137,7 @@ free_opp:
        return ret;
 }
 
-static int sun50i_cpufreq_nvmem_remove(struct platform_device *pdev)
+static void sun50i_cpufreq_nvmem_remove(struct platform_device *pdev)
 {
        int *opp_tokens = platform_get_drvdata(pdev);
        unsigned int cpu;
@@ -148,13 +148,11 @@ static int sun50i_cpufreq_nvmem_remove(struct platform_device *pdev)
                dev_pm_opp_put_prop_name(opp_tokens[cpu]);
 
        kfree(opp_tokens);
-
-       return 0;
 }
 
 static struct platform_driver sun50i_cpufreq_driver = {
        .probe = sun50i_cpufreq_nvmem_probe,
-       .remove = sun50i_cpufreq_nvmem_remove,
+       .remove_new = sun50i_cpufreq_nvmem_remove,
        .driver = {
                .name = "sun50i-cpufreq-nvmem",
        },
index f98f53b..7b8fcfa 100644 (file)
@@ -259,11 +259,9 @@ put_bpmp:
        return err;
 }
 
-static int tegra186_cpufreq_remove(struct platform_device *pdev)
+static void tegra186_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&tegra186_cpufreq_driver);
-
-       return 0;
 }
 
 static const struct of_device_id tegra186_cpufreq_of_match[] = {
@@ -278,7 +276,7 @@ static struct platform_driver tegra186_cpufreq_platform_driver = {
                .of_match_table = tegra186_cpufreq_of_match,
        },
        .probe = tegra186_cpufreq_probe,
-       .remove = tegra186_cpufreq_remove,
+       .remove_new = tegra186_cpufreq_remove,
 };
 module_platform_driver(tegra186_cpufreq_platform_driver);
 
index 36dad5e..88ef5e5 100644 (file)
@@ -508,6 +508,32 @@ static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
        return 0;
 }
 
+static int tegra194_cpufreq_online(struct cpufreq_policy *policy)
+{
+       /* We did light-weight tear down earlier, nothing to do here */
+       return 0;
+}
+
+static int tegra194_cpufreq_offline(struct cpufreq_policy *policy)
+{
+       /*
+        * Preserve policy->driver_data and don't free resources on light-weight
+        * tear down.
+        */
+
+       return 0;
+}
+
+static int tegra194_cpufreq_exit(struct cpufreq_policy *policy)
+{
+       struct device *cpu_dev = get_cpu_device(policy->cpu);
+
+       dev_pm_opp_remove_all_dynamic(cpu_dev);
+       dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+
+       return 0;
+}
+
 static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
                                       unsigned int index)
 {
@@ -535,6 +561,9 @@ static struct cpufreq_driver tegra194_cpufreq_driver = {
        .target_index = tegra194_cpufreq_set_target,
        .get = tegra194_get_speed,
        .init = tegra194_cpufreq_init,
+       .exit = tegra194_cpufreq_exit,
+       .online = tegra194_cpufreq_online,
+       .offline = tegra194_cpufreq_offline,
        .attr = cpufreq_generic_attr,
 };
 
@@ -708,12 +737,10 @@ put_bpmp:
        return err;
 }
 
-static int tegra194_cpufreq_remove(struct platform_device *pdev)
+static void tegra194_cpufreq_remove(struct platform_device *pdev)
 {
        cpufreq_unregister_driver(&tegra194_cpufreq_driver);
        tegra194_cpufreq_free_resources();
-
-       return 0;
 }
 
 static const struct of_device_id tegra194_cpufreq_of_match[] = {
@@ -730,7 +757,7 @@ static struct platform_driver tegra194_ccplex_driver = {
                .of_match_table = tegra194_cpufreq_of_match,
        },
        .probe = tegra194_cpufreq_probe,
-       .remove = tegra194_cpufreq_remove,
+       .remove_new = tegra194_cpufreq_remove,
 };
 module_platform_driver(tegra194_ccplex_driver);
 
index d5cd2fd..3c37d78 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index d295f40..9ac4ea5 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/slab.h>
@@ -552,7 +551,7 @@ static int ve_spc_cpufreq_probe(struct platform_device *pdev)
        return ret;
 }
 
-static int ve_spc_cpufreq_remove(struct platform_device *pdev)
+static void ve_spc_cpufreq_remove(struct platform_device *pdev)
 {
        bL_switcher_get_enabled();
        __bLs_unregister_notifier();
@@ -560,7 +559,6 @@ static int ve_spc_cpufreq_remove(struct platform_device *pdev)
        bL_switcher_put_enabled();
        pr_info("%s: Un-registered platform driver: %s\n", __func__,
                ve_spc_cpufreq_driver.name);
-       return 0;
 }
 
 static struct platform_driver ve_spc_cpufreq_platdrv = {
@@ -568,7 +566,7 @@ static struct platform_driver ve_spc_cpufreq_platdrv = {
                .name   = "vexpress-spc-cpufreq",
        },
        .probe          = ve_spc_cpufreq_probe,
-       .remove         = ve_spc_cpufreq_remove,
+       .remove_new     = ve_spc_cpufreq_remove,
 };
 module_platform_driver(ve_spc_cpufreq_platdrv);
 
index c2d6d9c..b88af12 100644 (file)
@@ -120,20 +120,6 @@ static void psci_pd_remove(void)
        }
 }
 
-static bool psci_pd_try_set_osi_mode(void)
-{
-       int ret;
-
-       if (!psci_has_osi_support())
-               return false;
-
-       ret = psci_set_osi_mode(true);
-       if (ret)
-               return false;
-
-       return true;
-}
-
 static void psci_cpuidle_domain_sync_state(struct device *dev)
 {
        /*
@@ -152,15 +138,12 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct device_node *node;
-       bool use_osi;
+       bool use_osi = psci_has_osi_support();
        int ret = 0, pd_count = 0;
 
        if (!np)
                return -ENODEV;
 
-       /* If OSI mode is supported, let's try to enable it. */
-       use_osi = psci_pd_try_set_osi_mode();
-
        /*
         * Parse child nodes for the "#power-domain-cells" property and
         * initialize a genpd/genpd-of-provider pair when it's found.
@@ -170,33 +153,37 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
                        continue;
 
                ret = psci_pd_init(node, use_osi);
-               if (ret)
-                       goto put_node;
+               if (ret) {
+                       of_node_put(node);
+                       goto exit;
+               }
 
                pd_count++;
        }
 
        /* Bail out if not using the hierarchical CPU topology. */
        if (!pd_count)
-               goto no_pd;
+               return 0;
 
        /* Link genpd masters/subdomains to model the CPU topology. */
        ret = dt_idle_pd_init_topology(np);
        if (ret)
                goto remove_pd;
 
+       /* let's try to enable OSI. */
+       ret = psci_set_osi_mode(use_osi);
+       if (ret)
+               goto remove_pd;
+
        pr_info("Initialized CPU PM domain topology using %s mode\n",
                use_osi ? "OSI" : "PC");
        return 0;
 
-put_node:
-       of_node_put(node);
 remove_pd:
+       dt_idle_pd_remove_topology(np);
        psci_pd_remove();
+exit:
        pr_err("failed to create CPU PM domains ret=%d\n", ret);
-no_pd:
-       if (use_osi)
-               psci_set_osi_mode(false);
        return ret;
 }
 
index b371655..1af63c1 100644 (file)
@@ -152,6 +152,30 @@ int dt_idle_pd_init_topology(struct device_node *np)
        return 0;
 }
 
+int dt_idle_pd_remove_topology(struct device_node *np)
+{
+       struct device_node *node;
+       struct of_phandle_args child, parent;
+       int ret;
+
+       for_each_child_of_node(np, node) {
+               if (of_parse_phandle_with_args(node, "power-domains",
+                                       "#power-domain-cells", 0, &parent))
+                       continue;
+
+               child.np = node;
+               child.args_count = 0;
+               ret = of_genpd_remove_subdomain(&parent, &child);
+               of_node_put(parent.np);
+               if (ret) {
+                       of_node_put(node);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
 struct device *dt_idle_attach_cpu(int cpu, const char *name)
 {
        struct device *dev;
index a95483d..3be1f70 100644 (file)
@@ -14,6 +14,8 @@ struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np,
 
 int dt_idle_pd_init_topology(struct device_node *np);
 
+int dt_idle_pd_remove_topology(struct device_node *np);
+
 struct device *dt_idle_attach_cpu(int cpu, const char *name);
 
 void dt_idle_detach_cpu(struct device *dev);
@@ -36,6 +38,11 @@ static inline int dt_idle_pd_init_topology(struct device_node *np)
        return 0;
 }
 
+static inline int dt_idle_pd_remove_topology(struct device_node *np)
+{
+       return 0;
+}
+
 static inline struct device *dt_idle_attach_cpu(int cpu, const char *name)
 {
        return NULL;
diff --git a/drivers/cpuidle/governors/gov.h b/drivers/cpuidle/governors/gov.h
new file mode 100644 (file)
index 0000000..99e067d
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Common definitions for cpuidle governors. */
+
+#ifndef __CPUIDLE_GOVERNOR_H
+#define __CPUIDLE_GOVERNOR_H
+
+/*
+ * Idle state target residency threshold used for deciding whether or not to
+ * check the time till the closest expected timer event.
+ */
+#define RESIDENCY_THRESHOLD_NS (15 * NSEC_PER_USEC)
+
+#endif /* __CPUIDLE_GOVERNOR_H */
index c492268..b96e3da 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/sched/stat.h>
 #include <linux/math64.h>
 
+#include "gov.h"
+
 #define BUCKETS 12
 #define INTERVAL_SHIFT 3
 #define INTERVALS (1UL << INTERVAL_SHIFT)
@@ -166,8 +168,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static unsigned int get_typical_interval(struct menu_device *data,
-                                        unsigned int predicted_us)
+static unsigned int get_typical_interval(struct menu_device *data)
 {
        int i, divisor;
        unsigned int min, max, thresh, avg;
@@ -195,11 +196,7 @@ again:
                }
        }
 
-       /*
-        * If the result of the computation is going to be discarded anyway,
-        * avoid the computation altogether.
-        */
-       if (min >= predicted_us)
+       if (!max)
                return UINT_MAX;
 
        if (divisor == INTERVALS)
@@ -267,7 +264,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
        struct menu_device *data = this_cpu_ptr(&menu_devices);
        s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
-       unsigned int predicted_us;
        u64 predicted_ns;
        u64 interactivity_req;
        unsigned int nr_iowaiters;
@@ -279,16 +275,41 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                data->needs_update = 0;
        }
 
-       /* determine the expected residency time, round up */
-       delta = tick_nohz_get_sleep_length(&delta_tick);
-       if (unlikely(delta < 0)) {
-               delta = 0;
-               delta_tick = 0;
-       }
-       data->next_timer_ns = delta;
-
        nr_iowaiters = nr_iowait_cpu(dev->cpu);
-       data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+
+       /* Find the shortest expected idle interval. */
+       predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
+       if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
+               unsigned int timer_us;
+
+               /* Determine the time till the closest timer. */
+               delta = tick_nohz_get_sleep_length(&delta_tick);
+               if (unlikely(delta < 0)) {
+                       delta = 0;
+                       delta_tick = 0;
+               }
+
+               data->next_timer_ns = delta;
+               data->bucket = which_bucket(data->next_timer_ns, nr_iowaiters);
+
+               /* Round up the result for half microseconds. */
+               timer_us = div_u64((RESOLUTION * DECAY * NSEC_PER_USEC) / 2 +
+                                       data->next_timer_ns *
+                                               data->correction_factor[data->bucket],
+                                  RESOLUTION * DECAY * NSEC_PER_USEC);
+               /* Use the lowest expected idle interval to pick the idle state. */
+               predicted_ns = min((u64)timer_us * NSEC_PER_USEC, predicted_ns);
+       } else {
+               /*
+                * Because the next timer event is not going to be determined
+                * in this case, assume that without the tick the closest timer
+                * will be in distant future and that the closest tick will occur
+                * after 1/2 of the tick period.
+                */
+               data->next_timer_ns = KTIME_MAX;
+               delta_tick = TICK_NSEC / 2;
+               data->bucket = which_bucket(KTIME_MAX, nr_iowaiters);
+       }
 
        if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
            ((data->next_timer_ns < drv->states[1].target_residency_ns ||
@@ -303,16 +324,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                return 0;
        }
 
-       /* Round up the result for half microseconds. */
-       predicted_us = div_u64(data->next_timer_ns *
-                              data->correction_factor[data->bucket] +
-                              (RESOLUTION * DECAY * NSEC_PER_USEC) / 2,
-                              RESOLUTION * DECAY * NSEC_PER_USEC);
-       /* Use the lowest expected idle interval to pick the idle state. */
-       predicted_ns = (u64)min(predicted_us,
-                               get_typical_interval(data, predicted_us)) *
-                               NSEC_PER_USEC;
-
        if (tick_nohz_tick_stopped()) {
                /*
                 * If the tick is already stopped, the cost of possible short
index 987fc5f..7244f71 100644 (file)
 #include <linux/sched/topology.h>
 #include <linux/tick.h>
 
+#include "gov.h"
+
 /*
  * The number of bits to shift the CPU's capacity by in order to determine
  * the utilized threshold.
  */
 #define UTIL_THRESHOLD_SHIFT 6
 
-
 /*
  * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
  * is used for decreasing metrics on a regular basis.
@@ -186,8 +187,8 @@ struct teo_bin {
  * @total: Grand total of the "intercepts" and "hits" metrics for all bins.
  * @next_recent_idx: Index of the next @recent_idx entry to update.
  * @recent_idx: Indices of bins corresponding to recent "intercepts".
+ * @tick_hits: Number of "hits" after TICK_NSEC.
  * @util_threshold: Threshold above which the CPU is considered utilized
- * @utilized: Whether the last sleep on the CPU happened while utilized
  */
 struct teo_cpu {
        s64 time_span_ns;
@@ -196,8 +197,8 @@ struct teo_cpu {
        unsigned int total;
        int next_recent_idx;
        int recent_idx[NR_RECENT];
+       unsigned int tick_hits;
        unsigned long util_threshold;
-       bool utilized;
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -228,6 +229,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
        struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
        int i, idx_timer = 0, idx_duration = 0;
+       s64 target_residency_ns;
        u64 measured_ns;
 
        if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -268,7 +270,6 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
         * fall into.
         */
        for (i = 0; i < drv->state_count; i++) {
-               s64 target_residency_ns = drv->states[i].target_residency_ns;
                struct teo_bin *bin = &cpu_data->state_bins[i];
 
                bin->hits -= bin->hits >> DECAY_SHIFT;
@@ -276,6 +277,8 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
                cpu_data->total += bin->hits + bin->intercepts;
 
+               target_residency_ns = drv->states[i].target_residency_ns;
+
                if (target_residency_ns <= cpu_data->sleep_length_ns) {
                        idx_timer = i;
                        if (target_residency_ns <= measured_ns)
@@ -291,6 +294,26 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
                cpu_data->state_bins[cpu_data->recent_idx[i]].recent--;
 
        /*
+        * If the deepest state's target residency is below the tick length,
+        * make a record of it to help teo_select() decide whether or not
+        * to stop the tick.  This effectively adds an extra hits-only bin
+        * beyond the last state-related one.
+        */
+       if (target_residency_ns < TICK_NSEC) {
+               cpu_data->tick_hits -= cpu_data->tick_hits >> DECAY_SHIFT;
+
+               cpu_data->total += cpu_data->tick_hits;
+
+               if (TICK_NSEC <= cpu_data->sleep_length_ns) {
+                       idx_timer = drv->state_count;
+                       if (TICK_NSEC <= measured_ns) {
+                               cpu_data->tick_hits += PULSE;
+                               goto end;
+                       }
+               }
+       }
+
+       /*
         * If the measured idle duration falls into the same bin as the sleep
         * length, this is a "hit", so update the "hits" metric for that bin.
         * Otherwise, update the "intercepts" metric for the bin fallen into by
@@ -305,18 +328,14 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
                cpu_data->recent_idx[i] = idx_duration;
        }
 
+end:
        cpu_data->total += PULSE;
 }
 
-static bool teo_time_ok(u64 interval_ns)
+static bool teo_state_ok(int i, struct cpuidle_driver *drv)
 {
-       return !tick_nohz_tick_stopped() || interval_ns >= TICK_NSEC;
-}
-
-static s64 teo_middle_of_bin(int idx, struct cpuidle_driver *drv)
-{
-       return (drv->states[idx].target_residency_ns +
-               drv->states[idx+1].target_residency_ns) / 2;
+       return !tick_nohz_tick_stopped() ||
+               drv->states[i].target_residency_ns >= TICK_NSEC;
 }
 
 /**
@@ -356,6 +375,8 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 {
        struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
        s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
+       ktime_t delta_tick = TICK_NSEC / 2;
+       unsigned int tick_intercept_sum = 0;
        unsigned int idx_intercept_sum = 0;
        unsigned int intercept_sum = 0;
        unsigned int idx_recent_sum = 0;
@@ -365,7 +386,7 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
        int constraint_idx = 0;
        int idx0 = 0, idx = -1;
        bool alt_intercepts, alt_recent;
-       ktime_t delta_tick;
+       bool cpu_utilized;
        s64 duration_ns;
        int i;
 
@@ -375,44 +396,48 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
        }
 
        cpu_data->time_span_ns = local_clock();
-
-       duration_ns = tick_nohz_get_sleep_length(&delta_tick);
-       cpu_data->sleep_length_ns = duration_ns;
+       /*
+        * Set the expected sleep length to infinity in case of an early
+        * return.
+        */
+       cpu_data->sleep_length_ns = KTIME_MAX;
 
        /* Check if there is any choice in the first place. */
        if (drv->state_count < 2) {
                idx = 0;
-               goto end;
+               goto out_tick;
        }
-       if (!dev->states_usage[0].disable) {
+
+       if (!dev->states_usage[0].disable)
                idx = 0;
-               if (drv->states[1].target_residency_ns > duration_ns)
-                       goto end;
-       }
 
-       cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
+       cpu_utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
        /*
         * If the CPU is being utilized over the threshold and there are only 2
         * states to choose from, the metrics need not be considered, so choose
         * the shallowest non-polling state and exit.
         */
-       if (drv->state_count < 3 && cpu_data->utilized) {
-               for (i = 0; i < drv->state_count; ++i) {
-                       if (!dev->states_usage[i].disable &&
-                           !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
-                               idx = i;
-                               goto end;
-                       }
+       if (drv->state_count < 3 && cpu_utilized) {
+               /*
+                * If state 0 is enabled and it is not a polling one, select it
+                * right away unless the scheduler tick has been stopped, in
+                * which case care needs to be taken to leave the CPU in a deep
+                * enough state in case it is not woken up any time soon after
+                * all.  If state 1 is disabled, though, state 0 must be used
+                * anyway.
+                */
+               if ((!idx && !(drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
+                   teo_state_ok(0, drv)) || dev->states_usage[1].disable) {
+                       idx = 0;
+                       goto out_tick;
                }
+               /* Assume that state 1 is not a polling one and use it. */
+               idx = 1;
+               duration_ns = drv->states[1].target_residency_ns;
+               goto end;
        }
 
-       /*
-        * Find the deepest idle state whose target residency does not exceed
-        * the current sleep length and the deepest idle state not deeper than
-        * the former whose exit latency does not exceed the current latency
-        * constraint.  Compute the sums of metrics for early wakeup pattern
-        * detection.
-        */
+       /* Compute the sums of metrics for early wakeup pattern detection. */
        for (i = 1; i < drv->state_count; i++) {
                struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
                struct cpuidle_state *s = &drv->states[i];
@@ -428,19 +453,15 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                if (dev->states_usage[i].disable)
                        continue;
 
-               if (idx < 0) {
-                       idx = i; /* first enabled state */
-                       idx0 = i;
-               }
-
-               if (s->target_residency_ns > duration_ns)
-                       break;
+               if (idx < 0)
+                       idx0 = i; /* first enabled state */
 
                idx = i;
 
                if (s->exit_latency_ns <= latency_req)
                        constraint_idx = i;
 
+               /* Save the sums for the current state. */
                idx_intercept_sum = intercept_sum;
                idx_hit_sum = hit_sum;
                idx_recent_sum = recent_sum;
@@ -449,11 +470,21 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
        /* Avoid unnecessary overhead. */
        if (idx < 0) {
                idx = 0; /* No states enabled, must use 0. */
-               goto end;
-       } else if (idx == idx0) {
+               goto out_tick;
+       }
+
+       if (idx == idx0) {
+               /*
+                * Only one idle state is enabled, so use it, but do not
+                * allow the tick to be stopped it is shallow enough.
+                */
+               duration_ns = drv->states[idx].target_residency_ns;
                goto end;
        }
 
+       tick_intercept_sum = intercept_sum +
+                       cpu_data->state_bins[drv->state_count-1].intercepts;
+
        /*
         * If the sum of the intercepts metric for all of the idle states
         * shallower than the current candidate one (idx) is greater than the
@@ -461,13 +492,11 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
         * all of the deeper states, or the sum of the numbers of recent
         * intercepts over all of the states shallower than the candidate one
         * is greater than a half of the number of recent events taken into
-        * account, the CPU is likely to wake up early, so find an alternative
-        * idle state to select.
+        * account, a shallower idle state is likely to be a better choice.
         */
        alt_intercepts = 2 * idx_intercept_sum > cpu_data->total - idx_hit_sum;
        alt_recent = idx_recent_sum > NR_RECENT / 2;
        if (alt_recent || alt_intercepts) {
-               s64 first_suitable_span_ns = duration_ns;
                int first_suitable_idx = idx;
 
                /*
@@ -476,44 +505,39 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                 * cases (both with respect to intercepts overall and with
                 * respect to the recent intercepts only) in the past.
                 *
-                * Take the possible latency constraint and duration limitation
-                * present if the tick has been stopped already into account.
+                * Take the possible duration limitation present if the tick
+                * has been stopped already into account.
                 */
                intercept_sum = 0;
                recent_sum = 0;
 
                for (i = idx - 1; i >= 0; i--) {
                        struct teo_bin *bin = &cpu_data->state_bins[i];
-                       s64 span_ns;
 
                        intercept_sum += bin->intercepts;
                        recent_sum += bin->recent;
 
-                       span_ns = teo_middle_of_bin(i, drv);
-
                        if ((!alt_recent || 2 * recent_sum > idx_recent_sum) &&
                            (!alt_intercepts ||
                             2 * intercept_sum > idx_intercept_sum)) {
-                               if (teo_time_ok(span_ns) &&
-                                   !dev->states_usage[i].disable) {
+                               /*
+                                * Use the current state unless it is too
+                                * shallow or disabled, in which case take the
+                                * first enabled state that is deep enough.
+                                */
+                               if (teo_state_ok(i, drv) &&
+                                   !dev->states_usage[i].disable)
                                        idx = i;
-                                       duration_ns = span_ns;
-                               } else {
-                                       /*
-                                        * The current state is too shallow or
-                                        * disabled, so take the first enabled
-                                        * deeper state with suitable time span.
-                                        */
+                               else
                                        idx = first_suitable_idx;
-                                       duration_ns = first_suitable_span_ns;
-                               }
+
                                break;
                        }
 
                        if (dev->states_usage[i].disable)
                                continue;
 
-                       if (!teo_time_ok(span_ns)) {
+                       if (!teo_state_ok(i, drv)) {
                                /*
                                 * The current state is too shallow, but if an
                                 * alternative candidate state has been found,
@@ -525,7 +549,6 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
                                break;
                        }
 
-                       first_suitable_span_ns = span_ns;
                        first_suitable_idx = i;
                }
        }
@@ -539,31 +562,75 @@ static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 
        /*
         * If the CPU is being utilized over the threshold, choose a shallower
-        * non-polling state to improve latency
+        * non-polling state to improve latency, unless the scheduler tick has
+        * been stopped already and the shallower state's target residency is
+        * not sufficiently large.
         */
-       if (cpu_data->utilized)
-               idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
+       if (cpu_utilized) {
+               i = teo_find_shallower_state(drv, dev, idx, KTIME_MAX, true);
+               if (teo_state_ok(i, drv))
+                       idx = i;
+       }
 
-end:
        /*
-        * Don't stop the tick if the selected state is a polling one or if the
-        * expected idle duration is shorter than the tick period length.
+        * Skip the timers check if state 0 is the current candidate one,
+        * because an immediate non-timer wakeup is expected in that case.
         */
-       if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
-           duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
-               *stop_tick = false;
+       if (!idx)
+               goto out_tick;
 
-               /*
-                * The tick is not going to be stopped, so if the target
-                * residency of the state to be returned is not within the time
-                * till the closest timer including the tick, try to correct
-                * that.
-                */
-               if (idx > idx0 &&
-                   drv->states[idx].target_residency_ns > delta_tick)
-                       idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
+       /*
+        * If state 0 is a polling one, check if the target residency of
+        * the current candidate state is low enough and skip the timers
+        * check in that case too.
+        */
+       if ((drv->states[0].flags & CPUIDLE_FLAG_POLLING) &&
+           drv->states[idx].target_residency_ns < RESIDENCY_THRESHOLD_NS)
+               goto out_tick;
+
+       duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+       cpu_data->sleep_length_ns = duration_ns;
+
+       /*
+        * If the closest expected timer is before the terget residency of the
+        * candidate state, a shallower one needs to be found.
+        */
+       if (drv->states[idx].target_residency_ns > duration_ns) {
+               i = teo_find_shallower_state(drv, dev, idx, duration_ns, false);
+               if (teo_state_ok(i, drv))
+                       idx = i;
        }
 
+       /*
+        * If the selected state's target residency is below the tick length
+        * and intercepts occurring before the tick length are the majority of
+        * total wakeup events, do not stop the tick.
+        */
+       if (drv->states[idx].target_residency_ns < TICK_NSEC &&
+           tick_intercept_sum > cpu_data->total / 2 + cpu_data->total / 8)
+               duration_ns = TICK_NSEC / 2;
+
+end:
+       /*
+        * Allow the tick to be stopped unless the selected state is a polling
+        * one or the expected idle duration is shorter than the tick period
+        * length.
+        */
+       if ((!(drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
+           duration_ns >= TICK_NSEC) || tick_nohz_tick_stopped())
+               return idx;
+
+       /*
+        * The tick is not going to be stopped, so if the target residency of
+        * the state to be returned is not within the time till the closest
+        * timer including the tick, try to correct that.
+        */
+       if (idx > idx0 &&
+           drv->states[idx].target_residency_ns > delta_tick)
+               idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
+
+out_tick:
+       *stop_tick = false;
        return idx;
 }
 
index 44e44b8..c761952 100644 (file)
@@ -70,10 +70,9 @@ config ZCRYPT
        select HW_RANDOM
        help
          Select this option if you want to enable support for
-         s390 cryptographic adapters like:
-         + Crypto Express 2 up to 7 Coprocessor (CEXxC)
-         + Crypto Express 2 up to 7 Accelerator (CEXxA)
-         + Crypto Express 4 up to 7 EP11 Coprocessor (CEXxP)
+         s390 cryptographic adapters like Crypto Express 4 up
+         to 8 in Coprocessor (CEXxC), EP11 Coprocessor (CEXxP)
+         or Accelerator (CEXxA) mode.
 
 config ZCRYPT_DEBUG
        bool "Enable debug features for s390 cryptographic adapters"
index ff9ddbb..68e7377 100644 (file)
@@ -382,8 +382,8 @@ static void kick_trng(struct device *dev, int ent_delay)
                val = ent_delay;
                /* min. freq. count, equal to 1/4 of the entropy sample length */
                wr_reg32(&r4tst->rtfrqmin, val >> 2);
-               /* max. freq. count, equal to 16 times the entropy sample length */
-               wr_reg32(&r4tst->rtfrqmax, val << 4);
+               /* disable maximum frequency count */
+               wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE);
        }
 
        wr_reg32(&r4tst->rtsdctl, (val << RTSDCTL_ENT_DLY_SHIFT) |
index fcbf829..8ea1d34 100644 (file)
@@ -2,6 +2,8 @@
 menuconfig CXL_BUS
        tristate "CXL (Compute Express Link) Devices Support"
        depends on PCI
+       select FW_LOADER
+       select FW_UPLOAD
        select PCI_DOE
        help
          CXL is a bus that is electrically compatible with PCI Express, but
@@ -82,7 +84,6 @@ config CXL_PMEM
 config CXL_MEM
        tristate "CXL: Memory Expansion"
        depends on CXL_PCI
-       select FW_UPLOAD
        default CXL_BUS
        help
          The CXL.mem protocol allows a device to act as a provider of "System
index 658e6b8..d1c5598 100644 (file)
@@ -296,9 +296,8 @@ err_xormap:
        else
                rc = cxl_decoder_autoremove(dev, cxld);
        if (rc) {
-               dev_err(dev, "Failed to add decode range [%#llx - %#llx]\n",
-                       cxld->hpa_range.start, cxld->hpa_range.end);
-               return 0;
+               dev_err(dev, "Failed to add decode range: %pr", res);
+               return rc;
        }
        dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
                dev_name(&cxld->dev),
index d6d067f..ca60bb8 100644 (file)
@@ -121,6 +121,45 @@ static bool cxl_is_security_command(u16 opcode)
        return false;
 }
 
+static void cxl_set_security_cmd_enabled(struct cxl_security_state *security,
+                                        u16 opcode)
+{
+       switch (opcode) {
+       case CXL_MBOX_OP_SANITIZE:
+               set_bit(CXL_SEC_ENABLED_SANITIZE, security->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_SECURE_ERASE:
+               set_bit(CXL_SEC_ENABLED_SECURE_ERASE,
+                       security->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_GET_SECURITY_STATE:
+               set_bit(CXL_SEC_ENABLED_GET_SECURITY_STATE,
+                       security->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_SET_PASSPHRASE:
+               set_bit(CXL_SEC_ENABLED_SET_PASSPHRASE,
+                       security->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_DISABLE_PASSPHRASE:
+               set_bit(CXL_SEC_ENABLED_DISABLE_PASSPHRASE,
+                       security->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_UNLOCK:
+               set_bit(CXL_SEC_ENABLED_UNLOCK, security->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_FREEZE_SECURITY:
+               set_bit(CXL_SEC_ENABLED_FREEZE_SECURITY,
+                       security->enabled_cmds);
+               break;
+       case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
+               set_bit(CXL_SEC_ENABLED_PASSPHRASE_SECURE_ERASE,
+                       security->enabled_cmds);
+               break;
+       default:
+               break;
+       }
+}
+
 static bool cxl_is_poison_command(u16 opcode)
 {
 #define CXL_MBOX_OP_POISON_CMDS 0x43
@@ -677,7 +716,8 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
                u16 opcode = le16_to_cpu(cel_entry[i].opcode);
                struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
 
-               if (!cmd && !cxl_is_poison_command(opcode)) {
+               if (!cmd && (!cxl_is_poison_command(opcode) ||
+                            !cxl_is_security_command(opcode))) {
                        dev_dbg(dev,
                                "Opcode 0x%04x unsupported by driver\n", opcode);
                        continue;
@@ -689,6 +729,9 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
                if (cxl_is_poison_command(opcode))
                        cxl_set_poison_cmd_enabled(&mds->poison, opcode);
 
+               if (cxl_is_security_command(opcode))
+                       cxl_set_security_cmd_enabled(&mds->security, opcode);
+
                dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode);
        }
 }
index f99e7ec..14b547c 100644 (file)
@@ -477,9 +477,28 @@ static struct attribute_group cxl_memdev_pmem_attribute_group = {
        .attrs = cxl_memdev_pmem_attributes,
 };
 
+static umode_t cxl_memdev_security_visible(struct kobject *kobj,
+                                          struct attribute *a, int n)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+       if (a == &dev_attr_security_sanitize.attr &&
+           !test_bit(CXL_SEC_ENABLED_SANITIZE, mds->security.enabled_cmds))
+               return 0;
+
+       if (a == &dev_attr_security_erase.attr &&
+           !test_bit(CXL_SEC_ENABLED_SECURE_ERASE, mds->security.enabled_cmds))
+               return 0;
+
+       return a->mode;
+}
+
 static struct attribute_group cxl_memdev_security_attribute_group = {
        .name = "security",
        .attrs = cxl_memdev_security_attributes,
+       .is_visible = cxl_memdev_security_visible,
 };
 
 static const struct attribute_group *cxl_memdev_attribute_groups[] = {
index 79e99c8..706f8a6 100644 (file)
@@ -244,6 +244,19 @@ enum poison_cmd_enabled_bits {
        CXL_POISON_ENABLED_MAX
 };
 
+/* Device enabled security commands */
+enum security_cmd_enabled_bits {
+       CXL_SEC_ENABLED_SANITIZE,
+       CXL_SEC_ENABLED_SECURE_ERASE,
+       CXL_SEC_ENABLED_GET_SECURITY_STATE,
+       CXL_SEC_ENABLED_SET_PASSPHRASE,
+       CXL_SEC_ENABLED_DISABLE_PASSPHRASE,
+       CXL_SEC_ENABLED_UNLOCK,
+       CXL_SEC_ENABLED_FREEZE_SECURITY,
+       CXL_SEC_ENABLED_PASSPHRASE_SECURE_ERASE,
+       CXL_SEC_ENABLED_MAX
+};
+
 /**
  * struct cxl_poison_state - Driver poison state info
  *
@@ -323,7 +336,7 @@ struct cxl_mbox_activate_fw {
 
 /* FW state bits */
 #define CXL_FW_STATE_BITS              32
-#define CXL_FW_CANCEL          BIT(0)
+#define CXL_FW_CANCEL                  0
 
 /**
  * struct cxl_fw_state - Firmware upload / activation state
@@ -346,6 +359,7 @@ struct cxl_fw_state {
  * struct cxl_security_state - Device security state
  *
  * @state: state of last security operation
+ * @enabled_cmds: All security commands enabled in the CEL
  * @poll: polling for sanitization is enabled, device has no mbox irq support
  * @poll_tmo_secs: polling timeout
  * @poll_dwork: polling work item
@@ -353,6 +367,7 @@ struct cxl_fw_state {
  */
 struct cxl_security_state {
        unsigned long state;
+       DECLARE_BITMAP(enabled_cmds, CXL_SEC_ENABLED_MAX);
        bool poll;
        int poll_tmo_secs;
        struct delayed_work poll_dwork;
@@ -434,6 +449,7 @@ struct cxl_dev_state {
  * @next_persistent_bytes: persistent capacity change pending device reset
  * @event: event log driver state
  * @poison: poison driver state info
+ * @security: security driver state info
  * @fw: firmware upload / activation state
  * @mbox_send: @dev specific transport for transmitting mailbox commands
  *
index e36cbb9..474d818 100644 (file)
@@ -472,10 +472,11 @@ static void devfreq_monitor(struct work_struct *work)
  * devfreq_monitor_start() - Start load monitoring of devfreq instance
  * @devfreq:   the devfreq instance.
  *
- * Helper function for starting devfreq device load monitoring. By
- * default delayed work based monitoring is supported. Function
- * to be called from governor in response to DEVFREQ_GOV_START
- * event when device is added to devfreq framework.
+ * Helper function for starting devfreq device load monitoring. By default,
+ * deferrable timer is used for load monitoring. But the users can change this
+ * behavior using the "timer" type in devfreq_dev_profile. This function will be
+ * called by devfreq governor in response to the DEVFREQ_GOV_START event
+ * generated while adding a device to the devfreq framework.
  */
 void devfreq_monitor_start(struct devfreq *devfreq)
 {
@@ -763,6 +764,7 @@ static void devfreq_dev_release(struct device *dev)
                dev_pm_opp_put_opp_table(devfreq->opp_table);
 
        mutex_destroy(&devfreq->lock);
+       srcu_cleanup_notifier_head(&devfreq->transition_notifier_list);
        kfree(devfreq);
 }
 
index a727067..86850b7 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/devfreq.h>
 #include <linux/device.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/pm_opp.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
index 1663697..e134849 100644 (file)
@@ -3,9 +3,9 @@
  * Copyright 2019 NXP
  */
 
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/devfreq.h>
 #include <linux/pm_opp.h>
index 6354622..83a73f0 100644 (file)
@@ -8,7 +8,6 @@
 #include <linux/minmax.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/regulator/consumer.h>
index 503376b..4a4f010 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 #include <linux/irq.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_opp.h>
 #include <linux/reset.h>
index 7002bca..c625bb2 100644 (file)
@@ -66,18 +66,36 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
 {
        struct dma_fence_array *result;
        struct dma_fence *tmp, **array;
+       ktime_t timestamp;
        unsigned int i;
        size_t count;
 
        count = 0;
+       timestamp = ns_to_ktime(0);
        for (i = 0; i < num_fences; ++i) {
-               dma_fence_unwrap_for_each(tmp, &iter[i], fences[i])
-                       if (!dma_fence_is_signaled(tmp))
+               dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
+                       if (!dma_fence_is_signaled(tmp)) {
                                ++count;
+                       } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
+                                           &tmp->flags)) {
+                               if (ktime_after(tmp->timestamp, timestamp))
+                                       timestamp = tmp->timestamp;
+                       } else {
+                               /*
+                                * Use the current time if the fence is
+                                * currently signaling.
+                                */
+                               timestamp = ktime_get();
+                       }
+               }
        }
 
+       /*
+        * If we couldn't find a pending fence just return a private signaled
+        * fence with the timestamp of the last signaled one.
+        */
        if (count == 0)
-               return dma_fence_get_stub();
+               return dma_fence_allocate_private_stub(timestamp);
 
        array = kmalloc_array(count, sizeof(*array), GFP_KERNEL);
        if (!array)
@@ -138,7 +156,7 @@ restart:
        } while (tmp);
 
        if (count == 0) {
-               tmp = dma_fence_get_stub();
+               tmp = dma_fence_allocate_private_stub(ktime_get());
                goto return_tmp;
        }
 
index f177c56..8aa8f8c 100644 (file)
@@ -150,16 +150,17 @@ EXPORT_SYMBOL(dma_fence_get_stub);
 
 /**
  * dma_fence_allocate_private_stub - return a private, signaled fence
+ * @timestamp: timestamp when the fence was signaled
  *
  * Return a newly allocated and signaled stub fence.
  */
-struct dma_fence *dma_fence_allocate_private_stub(void)
+struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp)
 {
        struct dma_fence *fence;
 
        fence = kzalloc(sizeof(*fence), GFP_KERNEL);
        if (fence == NULL)
-               return ERR_PTR(-ENOMEM);
+               return NULL;
 
        dma_fence_init(fence,
                       &dma_fence_stub_ops,
@@ -169,7 +170,7 @@ struct dma_fence *dma_fence_allocate_private_stub(void)
        set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
                &fence->flags);
 
-       dma_fence_signal(fence);
+       dma_fence_signal_timestamp(fence, timestamp);
 
        return fence;
 }
index b6f71eb..38b4110 100644 (file)
@@ -571,6 +571,7 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
        dma_resv_for_each_fence_unlocked(&cursor, fence) {
 
                if (dma_resv_iter_is_restarted(&cursor)) {
+                       struct dma_fence **new_fences;
                        unsigned int count;
 
                        while (*num_fences)
@@ -579,13 +580,17 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
                        count = cursor.num_fences + 1;
 
                        /* Eventually re-allocate the array */
-                       *fences = krealloc_array(*fences, count,
-                                                sizeof(void *),
-                                                GFP_KERNEL);
-                       if (count && !*fences) {
+                       new_fences = krealloc_array(*fences, count,
+                                                   sizeof(void *),
+                                                   GFP_KERNEL);
+                       if (count && !new_fences) {
+                               kfree(*fences);
+                               *fences = NULL;
+                               *num_fences = 0;
                                dma_resv_iter_end(&cursor);
                                return -ENOMEM;
                        }
+                       *fences = new_fences;
                }
 
                (*fences)[(*num_fences)++] = dma_fence_get(fence);
index 63f0aeb..f0a3527 100644 (file)
@@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
  */
 static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
 {
+       LIST_HEAD(signalled);
        struct sync_pt *pt, *next;
 
        trace_sync_timeline(obj);
@@ -203,21 +204,20 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
                if (!timeline_fence_signaled(&pt->base))
                        break;
 
-               list_del_init(&pt->link);
+               dma_fence_get(&pt->base);
+
+               list_move_tail(&pt->link, &signalled);
                rb_erase(&pt->node, &obj->pt_tree);
 
-               /*
-                * A signal callback may release the last reference to this
-                * fence, causing it to be freed. That operation has to be
-                * last to avoid a use after free inside this loop, and must
-                * be after we remove the fence from the timeline in order to
-                * prevent deadlocking on timeline->lock inside
-                * timeline_fence_release().
-                */
                dma_fence_signal_locked(&pt->base);
        }
 
        spin_unlock_irq(&obj->lock);
+
+       list_for_each_entry_safe(pt, next, &signalled, link) {
+               list_del_init(&pt->link);
+               dma_fence_put(&pt->base);
+       }
 }
 
 /**
index 644c188..08fdd0e 100644 (file)
@@ -211,6 +211,7 @@ config FSL_DMA
 config FSL_EDMA
        tristate "Freescale eDMA engine support"
        depends on OF
+       depends on HAS_IOMEM
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
@@ -280,6 +281,7 @@ config IMX_SDMA
 
 config INTEL_IDMA64
        tristate "Intel integrated DMA 64-bit support"
+       depends on HAS_IOMEM
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
index 5abbcc6..9a15f0d 100644 (file)
@@ -384,9 +384,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
        wq->threshold = 0;
        wq->priority = 0;
        wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
-       clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
-       clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
-       clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+       wq->flags = 0;
        memset(wq->name, 0, WQ_NAME_SIZE);
        wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
        idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
index ebd8733..9413fad 100644 (file)
@@ -190,7 +190,13 @@ static int mcf_edma_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       chans = pdata->dma_channels;
+       if (!pdata->dma_channels) {
+               dev_info(&pdev->dev, "setting default channel number to 64");
+               chans = 64;
+       } else {
+               chans = pdata->dma_channels;
+       }
+
        len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
        mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
        if (!mcf_edma)
@@ -202,11 +208,6 @@ static int mcf_edma_probe(struct platform_device *pdev)
        mcf_edma->drvdata = &mcf_data;
        mcf_edma->big_endian = 1;
 
-       if (!mcf_edma->n_chans) {
-               dev_info(&pdev->dev, "setting default channel number to 64");
-               mcf_edma->n_chans = 64;
-       }
-
        mutex_init(&mcf_edma->fsl_edma_mutex);
 
        mcf_edma->membase = devm_platform_ioremap_resource(pdev, 0);
index 95a462a..b6e0ac8 100644 (file)
@@ -192,7 +192,7 @@ struct owl_dma_pchan {
 };
 
 /**
- * struct owl_dma_pchan - Wrapper for DMA ENGINE channel
+ * struct owl_dma_vchan - Wrapper for DMA ENGINE channel
  * @vc: wrapped virtual channel
  * @pchan: the physical channel utilized by this channel
  * @txd: active transaction on this channel
index b4731fe..3cf0b38 100644 (file)
@@ -404,6 +404,12 @@ enum desc_status {
         */
        BUSY,
        /*
+        * Pause was called while descriptor was BUSY. Due to hardware
+        * limitations, only termination is possible for descriptors
+        * that have been paused.
+        */
+       PAUSED,
+       /*
         * Sitting on the channel work_list but xfer done
         * by PL330 core
         */
@@ -2041,7 +2047,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
        list_for_each_entry(desc, &pch->work_list, node) {
 
                /* If already submitted */
-               if (desc->status == BUSY)
+               if (desc->status == BUSY || desc->status == PAUSED)
                        continue;
 
                ret = pl330_submit_req(pch->thread, desc);
@@ -2326,6 +2332,7 @@ static int pl330_pause(struct dma_chan *chan)
 {
        struct dma_pl330_chan *pch = to_pchan(chan);
        struct pl330_dmac *pl330 = pch->dmac;
+       struct dma_pl330_desc *desc;
        unsigned long flags;
 
        pm_runtime_get_sync(pl330->ddma.dev);
@@ -2335,6 +2342,10 @@ static int pl330_pause(struct dma_chan *chan)
        _stop(pch->thread);
        spin_unlock(&pl330->lock);
 
+       list_for_each_entry(desc, &pch->work_list, node) {
+               if (desc->status == BUSY)
+                       desc->status = PAUSED;
+       }
        spin_unlock_irqrestore(&pch->lock, flags);
        pm_runtime_mark_last_busy(pl330->ddma.dev);
        pm_runtime_put_autosuspend(pl330->ddma.dev);
@@ -2425,7 +2436,7 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                else if (running && desc == running)
                        transferred =
                                pl330_get_current_xferred_count(pch, desc);
-               else if (desc->status == BUSY)
+               else if (desc->status == BUSY || desc->status == PAUSED)
                        /*
                         * Busy but not running means either just enqueued,
                         * or finished and not yet marked done
@@ -2442,6 +2453,9 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
                        case DONE:
                                ret = DMA_COMPLETE;
                                break;
+                       case PAUSED:
+                               ret = DMA_PAUSED;
+                               break;
                        case PREP:
                        case BUSY:
                                ret = DMA_IN_PROGRESS;
index 93ee298..e0bfd12 100644 (file)
@@ -668,6 +668,8 @@ static int xdma_set_vector_reg(struct xdma_device *xdev, u32 vec_tbl_start,
                        val |= irq_start << shift;
                        irq_start++;
                        irq_num--;
+                       if (!irq_num)
+                               break;
                }
 
                /* write IRQ register */
@@ -715,7 +717,7 @@ static int xdma_irq_init(struct xdma_device *xdev)
                ret = request_irq(irq, xdma_channel_isr, 0,
                                  "xdma-c2h-channel", &xdev->c2h_chans[j]);
                if (ret) {
-                       xdma_err(xdev, "H2C channel%d request irq%d failed: %d",
+                       xdma_err(xdev, "C2H channel%d request irq%d failed: %d",
                                 j, irq, ret);
                        goto failed_init_c2h;
                }
@@ -892,7 +894,7 @@ static int xdma_probe(struct platform_device *pdev)
        }
 
        reg_base = devm_ioremap_resource(&pdev->dev, res);
-       if (!reg_base) {
+       if (IS_ERR(reg_base)) {
                xdma_err(xdev, "ioremap failed");
                goto failed;
        }
index 597dae7..9b6642d 100644 (file)
@@ -4150,6 +4150,20 @@ static int per_family_init(struct amd64_pvt *pvt)
                }
                break;
 
+       case 0x1A:
+               switch (pvt->model) {
+               case 0x00 ... 0x1f:
+                       pvt->ctl_name           = "F1Ah";
+                       pvt->max_mcs            = 12;
+                       pvt->flags.zn_regs_v2   = 1;
+                       break;
+               case 0x40 ... 0x4f:
+                       pvt->ctl_name           = "F1Ah_M40h";
+                       pvt->flags.zn_regs_v2   = 1;
+                       break;
+               }
+               break;
+
        default:
                amd64_err("Unsupported family!\n");
                return -ENODEV;
@@ -4344,6 +4358,7 @@ static const struct x86_cpu_id amd64_cpuids[] = {
        X86_MATCH_VENDOR_FAM(AMD,       0x17, NULL),
        X86_MATCH_VENDOR_FAM(HYGON,     0x18, NULL),
        X86_MATCH_VENDOR_FAM(AMD,       0x19, NULL),
+       X86_MATCH_VENDOR_FAM(AMD,       0x1A, NULL),
        { }
 };
 MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids);
index a897b6a..5abf997 100644 (file)
@@ -906,7 +906,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = {
        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X,  X86_STEPPINGS(0x0, 0xf), &spr_cfg),
        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X,   X86_STEPPINGS(0x0, 0xf), &spr_cfg),
        X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X,   X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
-       X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SIERRAFOREST_X,    X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
+       X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X,  X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
index 713582c..33f0ba1 100644 (file)
@@ -60,7 +60,7 @@ static void __init eisa_name_device(struct eisa_device *edev)
        int i;
        for (i = 0; i < EISA_INFOS; i++) {
                if (!strcmp(edev->id.sig, eisa_table[i].id.sig)) {
-                       strlcpy(edev->pretty_name,
+                       strscpy(edev->pretty_name,
                                eisa_table[i].name,
                                sizeof(edev->pretty_name));
                        return;
index 1efa5e9..19246ed 100644 (file)
@@ -166,8 +166,10 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
                return -ENOMEM;
 
        shmem = of_parse_phandle(cdev->of_node, "shmem", idx);
-       if (!of_device_is_compatible(shmem, "arm,scmi-shmem"))
+       if (!of_device_is_compatible(shmem, "arm,scmi-shmem")) {
+               of_node_put(shmem);
                return -ENXIO;
+       }
 
        ret = of_address_to_resource(shmem, 0, &res);
        of_node_put(shmem);
index 6971dcf..0493aa3 100644 (file)
@@ -818,10 +818,13 @@ static ssize_t scmi_dbg_raw_mode_common_write(struct file *filp,
         * before sending it with a single RAW xfer.
         */
        if (rd->tx_size < rd->tx_req_size) {
-               size_t cnt;
+               ssize_t cnt;
 
                cnt = simple_write_to_buffer(rd->tx.buf, rd->tx.len, ppos,
                                             buf, count);
+               if (cnt < 0)
+                       return cnt;
+
                rd->tx_size += cnt;
                if (cnt < count)
                        return cnt;
index 621c37e..c193516 100644 (file)
@@ -40,6 +40,7 @@
 /**
  * struct scmi_smc - Structure representing a SCMI smc transport
  *
+ * @irq: An optional IRQ for completion
  * @cinfo: SCMI channel info
  * @shmem: Transmit/Receive shared memory area
  * @shmem_lock: Lock to protect access to Tx/Rx shared memory area.
@@ -52,6 +53,7 @@
  */
 
 struct scmi_smc {
+       int irq;
        struct scmi_chan_info *cinfo;
        struct scmi_shared_mem __iomem *shmem;
        /* Protect access to shmem area */
@@ -127,7 +129,7 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
        struct resource res;
        struct device_node *np;
        u32 func_id;
-       int ret, irq;
+       int ret;
 
        if (!tx)
                return -ENODEV;
@@ -137,8 +139,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
                return -ENOMEM;
 
        np = of_parse_phandle(cdev->of_node, "shmem", 0);
-       if (!of_device_is_compatible(np, "arm,scmi-shmem"))
+       if (!of_device_is_compatible(np, "arm,scmi-shmem")) {
+               of_node_put(np);
                return -ENXIO;
+       }
 
        ret = of_address_to_resource(np, 0, &res);
        of_node_put(np);
@@ -167,11 +171,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
         * completion of a message is signaled by an interrupt rather than by
         * the return of the SMC call.
         */
-       irq = of_irq_get_byname(cdev->of_node, "a2p");
-       if (irq > 0) {
-               ret = devm_request_irq(dev, irq, smc_msg_done_isr,
-                                      IRQF_NO_SUSPEND,
-                                      dev_name(dev), scmi_info);
+       scmi_info->irq = of_irq_get_byname(cdev->of_node, "a2p");
+       if (scmi_info->irq > 0) {
+               ret = request_irq(scmi_info->irq, smc_msg_done_isr,
+                                 IRQF_NO_SUSPEND, dev_name(dev), scmi_info);
                if (ret) {
                        dev_err(dev, "failed to setup SCMI smc irq\n");
                        return ret;
@@ -193,6 +196,10 @@ static int smc_chan_free(int id, void *p, void *data)
        struct scmi_chan_info *cinfo = p;
        struct scmi_smc *scmi_info = cinfo->transport_info;
 
+       /* Ignore any possible further reception on the IRQ path */
+       if (scmi_info->irq > 0)
+               free_irq(scmi_info->irq, scmi_info);
+
        cinfo->transport_info = NULL;
        scmi_info->cinfo = NULL;
 
index f9040bd..285fe7a 100644 (file)
@@ -1095,3 +1095,22 @@ int sdei_event_handler(struct pt_regs *regs,
        return err;
 }
 NOKPROBE_SYMBOL(sdei_event_handler);
+
+void sdei_handler_abort(void)
+{
+       /*
+        * If the crash happened in an SDEI event handler then we need to
+        * finish the handler with the firmware so that we can have working
+        * interrupts in the crash kernel.
+        */
+       if (__this_cpu_read(sdei_active_critical_event)) {
+               pr_warn("still in SDEI critical event context, attempting to finish handler.\n");
+               __sdei_handler_abort();
+               __this_cpu_write(sdei_active_critical_event, NULL);
+       }
+       if (__this_cpu_read(sdei_active_normal_event)) {
+               pr_warn("still in SDEI normal event context, attempting to finish handler.\n");
+               __sdei_handler_abort();
+               __this_cpu_write(sdei_active_normal_event, NULL);
+       }
+}
index 16d64a3..92389a5 100644 (file)
@@ -88,6 +88,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB)        += efi-stub.o string.o intrinsics.o systable.o \
 lib-$(CONFIG_ARM)              += arm32-stub.o
 lib-$(CONFIG_ARM64)            += arm64.o arm64-stub.o smbios.o
 lib-$(CONFIG_X86)              += x86-stub.o
+lib-$(CONFIG_X86_64)           += x86-5lvl.o
 lib-$(CONFIG_RISCV)            += riscv.o riscv-stub.o
 lib-$(CONFIG_LOONGARCH)                += loongarch.o loongarch-stub.o
 
@@ -146,7 +147,7 @@ STUBCOPY_RELOC-$(CONFIG_ARM64)      := R_AARCH64_ABS
 
 # For RISC-V, we don't need anything special other than arm64. Keep all the
 # symbols in .init section and make sure that no absolute symbols references
-# doesn't exist.
+# exist.
 STUBCOPY_FLAGS-$(CONFIG_RISCV) += --prefix-alloc-sections=.init \
                                   --prefix-symbols=__efistub_
 STUBCOPY_RELOC-$(CONFIG_RISCV) := R_RISCV_HI20
index 770b8ec..8c40fc8 100644 (file)
@@ -106,7 +106,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
                 */
                status = efi_random_alloc(*reserve_size, min_kimg_align,
                                          reserve_addr, phys_seed,
-                                         EFI_LOADER_CODE);
+                                         EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
                if (status != EFI_SUCCESS)
                        efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
        } else {
index 7329842..bfa3062 100644 (file)
@@ -73,6 +73,8 @@ efi_status_t efi_parse_options(char const *cmdline)
                        efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
                } else if (!strcmp(param, "noinitrd")) {
                        efi_noinitrd = true;
+               } else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
+                       efi_no5lvl = true;
                } else if (!strcmp(param, "efi") && val) {
                        efi_nochunk = parse_option_str(val, "nochunk");
                        efi_novamap |= parse_option_str(val, "novamap");
index 6aa38a1..9823f6f 100644 (file)
@@ -33,6 +33,7 @@
 #define EFI_ALLOC_LIMIT                ULONG_MAX
 #endif
 
+extern bool efi_no5lvl;
 extern bool efi_nochunk;
 extern bool efi_nokaslr;
 extern int efi_loglevel;
@@ -955,7 +956,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
 
 efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
                              unsigned long *addr, unsigned long random_seed,
-                             int memory_type);
+                             int memory_type, unsigned long alloc_limit);
 
 efi_status_t efi_random_get_seed(void);
 
index 32c7a54..674a064 100644 (file)
@@ -16,7 +16,8 @@
  */
 static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
                                         unsigned long size,
-                                        unsigned long align_shift)
+                                        unsigned long align_shift,
+                                        u64 alloc_limit)
 {
        unsigned long align = 1UL << align_shift;
        u64 first_slot, last_slot, region_end;
@@ -29,7 +30,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
                return 0;
 
        region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
-                        (u64)EFI_ALLOC_LIMIT);
+                        alloc_limit);
        if (region_end < size)
                return 0;
 
@@ -54,7 +55,8 @@ efi_status_t efi_random_alloc(unsigned long size,
                              unsigned long align,
                              unsigned long *addr,
                              unsigned long random_seed,
-                             int memory_type)
+                             int memory_type,
+                             unsigned long alloc_limit)
 {
        unsigned long total_slots = 0, target_slot;
        unsigned long total_mirrored_slots = 0;
@@ -76,7 +78,7 @@ efi_status_t efi_random_alloc(unsigned long size,
                efi_memory_desc_t *md = (void *)map->map + map_offset;
                unsigned long slots;
 
-               slots = get_entry_num_slots(md, size, ilog2(align));
+               slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit);
                MD_NUM_SLOTS(md) = slots;
                total_slots += slots;
                if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
diff --git a/drivers/firmware/efi/libstub/x86-5lvl.c b/drivers/firmware/efi/libstub/x86-5lvl.c
new file mode 100644 (file)
index 0000000..479dd44
--- /dev/null
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/efi.h>
+
+#include <asm/boot.h>
+#include <asm/desc.h>
+#include <asm/efi.h>
+
+#include "efistub.h"
+#include "x86-stub.h"
+
+bool efi_no5lvl;
+
+static void (*la57_toggle)(void *cr3);
+
+static const struct desc_struct gdt[] = {
+       [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+       [GDT_ENTRY_KERNEL_CS]   = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+};
+
+/*
+ * Enabling (or disabling) 5 level paging is tricky, because it can only be
+ * done from 32-bit mode with paging disabled. This means not only that the
+ * code itself must be running from 32-bit addressable physical memory, but
+ * also that the root page table must be 32-bit addressable, as programming
+ * a 64-bit value into CR3 when running in 32-bit mode is not supported.
+ */
+efi_status_t efi_setup_5level_paging(void)
+{
+       u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src;
+       efi_status_t status;
+       u8 *la57_code;
+
+       if (!efi_is_64bit())
+               return EFI_SUCCESS;
+
+       /* check for 5 level paging support */
+       if (native_cpuid_eax(0) < 7 ||
+           !(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
+               return EFI_SUCCESS;
+
+       /* allocate some 32-bit addressable memory for code and a page table */
+       status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
+                                   U32_MAX);
+       if (status != EFI_SUCCESS)
+               return status;
+
+       la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size);
+       memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
+
+       /*
+        * To avoid the need to allocate a 32-bit addressable stack, the
+        * trampoline uses a LJMP instruction to switch back to long mode.
+        * LJMP takes an absolute destination address, which needs to be
+        * fixed up at runtime.
+        */
+       *(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code;
+
+       efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE);
+
+       return EFI_SUCCESS;
+}
+
+void efi_5level_switch(void)
+{
+       bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
+       bool have_la57 = native_read_cr4() & X86_CR4_LA57;
+       bool need_toggle = want_la57 ^ have_la57;
+       u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
+       u64 *cr3 = (u64 *)__native_read_cr3();
+       u64 *new_cr3;
+
+       if (!la57_toggle || !need_toggle)
+               return;
+
+       if (!have_la57) {
+               /*
+                * 5 level paging will be enabled, so a root level page needs
+                * to be allocated from the 32-bit addressable physical region,
+                * with its first entry referring to the existing hierarchy.
+                */
+               new_cr3 = memset(pgt, 0, PAGE_SIZE);
+               new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
+       } else {
+               /* take the new root table pointer from the current entry #0 */
+               new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
+
+               /* copy the new root table if it is not 32-bit addressable */
+               if ((u64)new_cr3 > U32_MAX)
+                       new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
+       }
+
+       native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt });
+
+       la57_toggle(new_cr3);
+}
index 220be75..2fee52e 100644 (file)
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/boot.h>
+#include <asm/kaslr.h>
+#include <asm/sev.h>
 
 #include "efistub.h"
-
-/* Maximum physical address for 64-bit kernel with 4-level paging */
-#define MAXMEM_X86_64_4LEVEL (1ull << 46)
+#include "x86-stub.h"
 
 const efi_system_table_t *efi_system_table;
 const efi_dxe_services_table_t *efi_dxe_table;
-u32 image_offset __section(".data");
 static efi_loaded_image_t *image = NULL;
+static efi_memory_attribute_protocol_t *memattr;
 
 typedef union sev_memory_acceptance_protocol sev_memory_acceptance_protocol_t;
 union sev_memory_acceptance_protocol {
@@ -72,7 +72,7 @@ preserve_pci_rom_image(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom)
        rom->data.type  = SETUP_PCI;
        rom->data.len   = size - sizeof(struct setup_data);
        rom->data.next  = 0;
-       rom->pcilen     = pci->romsize;
+       rom->pcilen     = romsize;
        *__rom = rom;
 
        status = efi_call_proto(pci, pci.read, EfiPciIoWidthUint16,
@@ -223,8 +223,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
        }
 }
 
-static void
-adjust_memory_range_protection(unsigned long start, unsigned long size)
+void efi_adjust_memory_range_protection(unsigned long start,
+                                       unsigned long size)
 {
        efi_status_t status;
        efi_gcd_memory_space_desc_t desc;
@@ -232,12 +232,18 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
        unsigned long rounded_start, rounded_end;
        unsigned long unprotect_start, unprotect_size;
 
-       if (efi_dxe_table == NULL)
-               return;
-
        rounded_start = rounddown(start, EFI_PAGE_SIZE);
        rounded_end = roundup(start + size, EFI_PAGE_SIZE);
 
+       if (memattr != NULL) {
+               efi_call_proto(memattr, clear_memory_attributes, rounded_start,
+                              rounded_end - rounded_start, EFI_MEMORY_XP);
+               return;
+       }
+
+       if (efi_dxe_table == NULL)
+               return;
+
        /*
         * Don't modify memory region attributes, they are
         * already suitable, to lower the possibility to
@@ -278,49 +284,6 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
        }
 }
 
-/*
- * Trampoline takes 2 pages and can be loaded in first megabyte of memory
- * with its end placed between 128k and 640k where BIOS might start.
- * (see arch/x86/boot/compressed/pgtable_64.c)
- *
- * We cannot find exact trampoline placement since memory map
- * can be modified by UEFI, and it can alter the computed address.
- */
-
-#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
-#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
-
-void startup_32(struct boot_params *boot_params);
-
-static void
-setup_memory_protection(unsigned long image_base, unsigned long image_size)
-{
-       /*
-        * Allow execution of possible trampoline used
-        * for switching between 4- and 5-level page tables
-        * and relocated kernel image.
-        */
-
-       adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
-                                      TRAMPOLINE_PLACEMENT_SIZE);
-
-#ifdef CONFIG_64BIT
-       if (image_base != (unsigned long)startup_32)
-               adjust_memory_range_protection(image_base, image_size);
-#else
-       /*
-        * Clear protection flags on a whole range of possible
-        * addresses used for KASLR. We don't need to do that
-        * on x86_64, since KASLR/extraction is performed after
-        * dedicated identity page tables are built and we only
-        * need to remove possible protection on relocated image
-        * itself disregarding further relocations.
-        */
-       adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
-                                      KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
-#endif
-}
-
 static void setup_unaccepted_memory(void)
 {
        efi_guid_t mem_acceptance_proto = OVMF_SEV_MEMORY_ACCEPTANCE_PROTOCOL_GUID;
@@ -346,9 +309,7 @@ static void setup_unaccepted_memory(void)
 
 static const efi_char16_t apple[] = L"Apple";
 
-static void setup_quirks(struct boot_params *boot_params,
-                        unsigned long image_base,
-                        unsigned long image_size)
+static void setup_quirks(struct boot_params *boot_params)
 {
        efi_char16_t *fw_vendor = (efi_char16_t *)(unsigned long)
                efi_table_attr(efi_system_table, fw_vendor);
@@ -357,9 +318,6 @@ static void setup_quirks(struct boot_params *boot_params,
                if (IS_ENABLED(CONFIG_APPLE_PROPERTIES))
                        retrieve_apple_device_properties(boot_params);
        }
-
-       if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES))
-               setup_memory_protection(image_base, image_size);
 }
 
 /*
@@ -512,7 +470,6 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
        }
 
        image_base = efi_table_attr(image, image_base);
-       image_offset = (void *)startup_32 - image_base;
 
        status = efi_allocate_pages(sizeof(struct boot_params),
                                    (unsigned long *)&boot_params, ULONG_MAX);
@@ -803,19 +760,96 @@ static efi_status_t exit_boot(struct boot_params *boot_params, void *handle)
        return EFI_SUCCESS;
 }
 
+static bool have_unsupported_snp_features(void)
+{
+       u64 unsupported;
+
+       unsupported = snp_get_unsupported_features(sev_get_status());
+       if (unsupported) {
+               efi_err("Unsupported SEV-SNP features detected: 0x%llx\n",
+                       unsupported);
+               return true;
+       }
+       return false;
+}
+
+static void efi_get_seed(void *seed, int size)
+{
+       efi_get_random_bytes(size, seed);
+
+       /*
+        * This only updates seed[0] when running on 32-bit, but in that case,
+        * seed[1] is not used anyway, as there is no virtual KASLR on 32-bit.
+        */
+       *(unsigned long *)seed ^= kaslr_get_random_long("EFI");
+}
+
+static void error(char *str)
+{
+       efi_warn("Decompression failed: %s\n", str);
+}
+
+static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry)
+{
+       unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
+       unsigned long addr, alloc_size, entry;
+       efi_status_t status;
+       u32 seed[2] = {};
+
+       /* determine the required size of the allocation */
+       alloc_size = ALIGN(max_t(unsigned long, output_len, kernel_total_size),
+                          MIN_KERNEL_ALIGN);
+
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && !efi_nokaslr) {
+               u64 range = KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR - kernel_total_size;
+
+               efi_get_seed(seed, sizeof(seed));
+
+               virt_addr += (range * seed[1]) >> 32;
+               virt_addr &= ~(CONFIG_PHYSICAL_ALIGN - 1);
+       }
+
+       status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr,
+                                 seed[0], EFI_LOADER_CODE,
+                                 EFI_X86_KERNEL_ALLOC_LIMIT);
+       if (status != EFI_SUCCESS)
+               return status;
+
+       entry = decompress_kernel((void *)addr, virt_addr, error);
+       if (entry == ULONG_MAX) {
+               efi_free(alloc_size, addr);
+               return EFI_LOAD_ERROR;
+       }
+
+       *kernel_entry = addr + entry;
+
+       efi_adjust_memory_range_protection(addr, kernel_total_size);
+
+       return EFI_SUCCESS;
+}
+
+static void __noreturn enter_kernel(unsigned long kernel_addr,
+                                   struct boot_params *boot_params)
+{
+       /* enter decompressed kernel with boot_params pointer in RSI/ESI */
+       asm("jmp *%0"::"r"(kernel_addr), "S"(boot_params));
+
+       unreachable();
+}
+
 /*
- * On success, we return the address of startup_32, which has potentially been
- * relocated by efi_relocate_kernel.
- * On failure, we exit to the firmware via efi_exit instead of returning.
+ * On success, this routine will jump to the relocated image directly and never
+ * return.  On failure, it will exit to the firmware via efi_exit() instead of
+ * returning.
  */
-asmlinkage unsigned long efi_main(efi_handle_t handle,
-                                 efi_system_table_t *sys_table_arg,
-                                 struct boot_params *boot_params)
+void __noreturn efi_stub_entry(efi_handle_t handle,
+                              efi_system_table_t *sys_table_arg,
+                              struct boot_params *boot_params)
 {
-       unsigned long bzimage_addr = (unsigned long)startup_32;
-       unsigned long buffer_start, buffer_end;
+       efi_guid_t guid = EFI_MEMORY_ATTRIBUTE_PROTOCOL_GUID;
        struct setup_header *hdr = &boot_params->hdr;
        const struct linux_efi_initrd *initrd = NULL;
+       unsigned long kernel_entry;
        efi_status_t status;
 
        efi_system_table = sys_table_arg;
@@ -823,65 +857,25 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
        if (efi_system_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
                efi_exit(handle, EFI_INVALID_PARAMETER);
 
-       efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
-       if (efi_dxe_table &&
-           efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
-               efi_warn("Ignoring DXE services table: invalid signature\n");
-               efi_dxe_table = NULL;
+       if (have_unsupported_snp_features())
+               efi_exit(handle, EFI_UNSUPPORTED);
+
+       if (IS_ENABLED(CONFIG_EFI_DXE_MEM_ATTRIBUTES)) {
+               efi_dxe_table = get_efi_config_table(EFI_DXE_SERVICES_TABLE_GUID);
+               if (efi_dxe_table &&
+                   efi_dxe_table->hdr.signature != EFI_DXE_SERVICES_TABLE_SIGNATURE) {
+                       efi_warn("Ignoring DXE services table: invalid signature\n");
+                       efi_dxe_table = NULL;
+               }
        }
 
-       /*
-        * If the kernel isn't already loaded at a suitable address,
-        * relocate it.
-        *
-        * It must be loaded above LOAD_PHYSICAL_ADDR.
-        *
-        * The maximum address for 64-bit is 1 << 46 for 4-level paging. This
-        * is defined as the macro MAXMEM, but unfortunately that is not a
-        * compile-time constant if 5-level paging is configured, so we instead
-        * define our own macro for use here.
-        *
-        * For 32-bit, the maximum address is complicated to figure out, for
-        * now use KERNEL_IMAGE_SIZE, which will be 512MiB, the same as what
-        * KASLR uses.
-        *
-        * Also relocate it if image_offset is zero, i.e. the kernel wasn't
-        * loaded by LoadImage, but rather by a bootloader that called the
-        * handover entry. The reason we must always relocate in this case is
-        * to handle the case of systemd-boot booting a unified kernel image,
-        * which is a PE executable that contains the bzImage and an initrd as
-        * COFF sections. The initrd section is placed after the bzImage
-        * without ensuring that there are at least init_size bytes available
-        * for the bzImage, and thus the compressed kernel's startup code may
-        * overwrite the initrd unless it is moved out of the way.
-        */
+       /* grab the memory attributes protocol if it exists */
+       efi_bs_call(locate_protocol, &guid, NULL, (void **)&memattr);
 
-       buffer_start = ALIGN(bzimage_addr - image_offset,
-                            hdr->kernel_alignment);
-       buffer_end = buffer_start + hdr->init_size;
-
-       if ((buffer_start < LOAD_PHYSICAL_ADDR)                              ||
-           (IS_ENABLED(CONFIG_X86_32) && buffer_end > KERNEL_IMAGE_SIZE)    ||
-           (IS_ENABLED(CONFIG_X86_64) && buffer_end > MAXMEM_X86_64_4LEVEL) ||
-           (image_offset == 0)) {
-               extern char _bss[];
-
-               status = efi_relocate_kernel(&bzimage_addr,
-                                            (unsigned long)_bss - bzimage_addr,
-                                            hdr->init_size,
-                                            hdr->pref_address,
-                                            hdr->kernel_alignment,
-                                            LOAD_PHYSICAL_ADDR);
-               if (status != EFI_SUCCESS) {
-                       efi_err("efi_relocate_kernel() failed!\n");
-                       goto fail;
-               }
-               /*
-                * Now that we've copied the kernel elsewhere, we no longer
-                * have a set up block before startup_32(), so reset image_offset
-                * to zero in case it was set earlier.
-                */
-               image_offset = 0;
+       status = efi_setup_5level_paging();
+       if (status != EFI_SUCCESS) {
+               efi_err("efi_setup_5level_paging() failed!\n");
+               goto fail;
        }
 
 #ifdef CONFIG_CMDLINE_BOOL
@@ -901,6 +895,12 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
                }
        }
 
+       status = efi_decompress_kernel(&kernel_entry);
+       if (status != EFI_SUCCESS) {
+               efi_err("Failed to decompress kernel\n");
+               goto fail;
+       }
+
        /*
         * At this point, an initrd may already have been loaded by the
         * bootloader and passed via bootparams. We permit an initrd loaded
@@ -940,7 +940,7 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
 
        setup_efi_pci(boot_params);
 
-       setup_quirks(boot_params, bzimage_addr, buffer_end - buffer_start);
+       setup_quirks(boot_params);
 
        setup_unaccepted_memory();
 
@@ -950,9 +950,38 @@ asmlinkage unsigned long efi_main(efi_handle_t handle,
                goto fail;
        }
 
-       return bzimage_addr;
+       /*
+        * Call the SEV init code while still running with the firmware's
+        * GDT/IDT, so #VC exceptions will be handled by EFI.
+        */
+       sev_enable(boot_params);
+
+       efi_5level_switch();
+
+       enter_kernel(kernel_entry, boot_params);
 fail:
-       efi_err("efi_main() failed!\n");
+       efi_err("efi_stub_entry() failed!\n");
 
        efi_exit(handle, status);
 }
+
+#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
+void efi_handover_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+                       struct boot_params *boot_params)
+{
+       extern char _bss[], _ebss[];
+
+       memset(_bss, 0, _ebss - _bss);
+       efi_stub_entry(handle, sys_table_arg, boot_params);
+}
+
+#ifndef CONFIG_EFI_MIXED
+extern __alias(efi_handover_entry)
+void efi32_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+                     struct boot_params *boot_params);
+
+extern __alias(efi_handover_entry)
+void efi64_stub_entry(efi_handle_t handle, efi_system_table_t *sys_table_arg,
+                     struct boot_params *boot_params);
+#endif
+#endif
diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h
new file mode 100644 (file)
index 0000000..37c5a36
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/efi.h>
+
+extern void trampoline_32bit_src(void *, bool);
+extern const u16 trampoline_ljmp_imm_offset;
+
+void efi_adjust_memory_range_protection(unsigned long start,
+                                       unsigned long size);
+
+#ifdef CONFIG_X86_64
+efi_status_t efi_setup_5level_paging(void);
+void efi_5level_switch(void);
+#else
+static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; }
+static inline void efi_5level_switch(void) {}
+#endif
index e5d7fa1..bdb17ea 100644 (file)
@@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
                }
 
                status = efi_random_alloc(alloc_size, min_kimg_align, &image_base,
-                                         seed, EFI_LOADER_CODE);
+                                         seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
                if (status != EFI_SUCCESS) {
                        efi_err("Failed to allocate memory\n");
                        goto free_cmdline;
index d0daacd..09525fb 100644 (file)
@@ -130,14 +130,25 @@ static int __init riscv_enable_runtime_services(void)
 }
 early_initcall(riscv_enable_runtime_services);
 
-void efi_virtmap_load(void)
+static void efi_virtmap_load(void)
 {
        preempt_disable();
        switch_mm(current->active_mm, &efi_mm, NULL);
 }
 
-void efi_virtmap_unload(void)
+static void efi_virtmap_unload(void)
 {
        switch_mm(&efi_mm, current->active_mm, NULL);
        preempt_enable();
 }
+
+void arch_efi_call_virt_setup(void)
+{
+       sync_kernel_mappings(efi_mm.pgd);
+       efi_virtmap_load();
+}
+
+void arch_efi_call_virt_teardown(void)
+{
+       efi_virtmap_unload();
+}
index a400c43..5d56bc4 100644 (file)
  * code doesn't get too cluttered:
  */
 #define efi_call_virt(f, args...)   \
-       efi_call_virt_pointer(efi.runtime, f, args)
-#define __efi_call_virt(f, args...) \
-       __efi_call_virt_pointer(efi.runtime, f, args)
+       arch_efi_call_virt(efi.runtime, f, args)
+
+union efi_rts_args {
+       struct {
+               efi_time_t      *time;
+               efi_time_cap_t  *capabilities;
+       } GET_TIME;
+
+       struct {
+               efi_time_t      *time;
+       } SET_TIME;
+
+       struct {
+               efi_bool_t      *enabled;
+               efi_bool_t      *pending;
+               efi_time_t      *time;
+       } GET_WAKEUP_TIME;
+
+       struct {
+               efi_bool_t      enable;
+               efi_time_t      *time;
+       } SET_WAKEUP_TIME;
+
+       struct {
+               efi_char16_t    *name;
+               efi_guid_t      *vendor;
+               u32             *attr;
+               unsigned long   *data_size;
+               void            *data;
+       } GET_VARIABLE;
+
+       struct {
+               unsigned long   *name_size;
+               efi_char16_t    *name;
+               efi_guid_t      *vendor;
+       } GET_NEXT_VARIABLE;
+
+       struct {
+               efi_char16_t    *name;
+               efi_guid_t      *vendor;
+               u32             attr;
+               unsigned long   data_size;
+               void            *data;
+       } SET_VARIABLE;
+
+       struct {
+               u32             attr;
+               u64             *storage_space;
+               u64             *remaining_space;
+               u64             *max_variable_size;
+       } QUERY_VARIABLE_INFO;
+
+       struct {
+               u32             *high_count;
+       } GET_NEXT_HIGH_MONO_COUNT;
+
+       struct {
+               efi_capsule_header_t **capsules;
+               unsigned long   count;
+               unsigned long   sg_list;
+       } UPDATE_CAPSULE;
+
+       struct {
+               efi_capsule_header_t **capsules;
+               unsigned long   count;
+               u64             *max_size;
+               int             *reset_type;
+       } QUERY_CAPSULE_CAPS;
+
+       struct {
+               efi_status_t    (__efiapi *acpi_prm_handler)(u64, void *);
+               u64             param_buffer_addr;
+               void            *context;
+       } ACPI_PRM_HANDLER;
+};
 
 struct efi_runtime_work efi_rts_work;
 
 /*
- * efi_queue_work:     Queue efi_runtime_service() and wait until it's done
- * @rts:               efi_runtime_service() function identifier
- * @rts_arg<1-5>:      efi_runtime_service() function arguments
+ * efi_queue_work:     Queue EFI runtime service call and wait for completion
+ * @_rts:              EFI runtime service function identifier
+ * @_args:             Arguments to pass to the EFI runtime service
  *
  * Accesses to efi_runtime_services() are serialized by a binary
  * semaphore (efi_runtime_lock) and caller waits until the work is
  * finished, hence _only_ one work is queued at a time and the caller
  * thread waits for completion.
  */
-#define efi_queue_work(_rts, _arg1, _arg2, _arg3, _arg4, _arg5)                \
-({                                                                     \
-       efi_rts_work.status = EFI_ABORTED;                              \
-                                                                       \
-       if (!efi_enabled(EFI_RUNTIME_SERVICES)) {                       \
-               pr_warn_once("EFI Runtime Services are disabled!\n");   \
-               efi_rts_work.status = EFI_DEVICE_ERROR;                 \
-               goto exit;                                              \
-       }                                                               \
-                                                                       \
-       init_completion(&efi_rts_work.efi_rts_comp);                    \
-       INIT_WORK(&efi_rts_work.work, efi_call_rts);                    \
-       efi_rts_work.arg1 = _arg1;                                      \
-       efi_rts_work.arg2 = _arg2;                                      \
-       efi_rts_work.arg3 = _arg3;                                      \
-       efi_rts_work.arg4 = _arg4;                                      \
-       efi_rts_work.arg5 = _arg5;                                      \
-       efi_rts_work.efi_rts_id = _rts;                                 \
-                                                                       \
-       /*                                                              \
-        * queue_work() returns 0 if work was already on queue,         \
-        * _ideally_ this should never happen.                          \
-        */                                                             \
-       if (queue_work(efi_rts_wq, &efi_rts_work.work))                 \
-               wait_for_completion(&efi_rts_work.efi_rts_comp);        \
-       else                                                            \
-               pr_err("Failed to queue work to efi_rts_wq.\n");        \
-                                                                       \
-       WARN_ON_ONCE(efi_rts_work.status == EFI_ABORTED);               \
-exit:                                                                  \
-       efi_rts_work.efi_rts_id = EFI_NONE;                             \
-       efi_rts_work.status;                                            \
-})
+#define efi_queue_work(_rts, _args...)                                 \
+       __efi_queue_work(EFI_ ## _rts,                                  \
+                        &(union efi_rts_args){ ._rts = { _args }})
 
 #ifndef arch_efi_save_flags
 #define arch_efi_save_flags(state_flags)       local_save_flags(state_flags)
@@ -103,7 +145,7 @@ unsigned long efi_call_virt_save_flags(void)
        return flags;
 }
 
-void efi_call_virt_check_flags(unsigned long flags, const char *call)
+void efi_call_virt_check_flags(unsigned long flags, const void *caller)
 {
        unsigned long cur_flags, mismatch;
 
@@ -114,8 +156,8 @@ void efi_call_virt_check_flags(unsigned long flags, const char *call)
                return;
 
        add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_NOW_UNRELIABLE);
-       pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI %s\n",
-                          flags, cur_flags, call);
+       pr_err_ratelimited(FW_BUG "IRQ flags corrupted (0x%08lx=>0x%08lx) by EFI call from %pS\n",
+                          flags, cur_flags, caller ?: __builtin_return_address(0));
        arch_efi_restore_flags(flags);
 }
 
@@ -170,74 +212,90 @@ extern struct semaphore __efi_uv_runtime_lock __alias(efi_runtime_lock);
 /*
  * Calls the appropriate efi_runtime_service() with the appropriate
  * arguments.
- *
- * Semantics followed by efi_call_rts() to understand efi_runtime_work:
- * 1. If argument was a pointer, recast it from void pointer to original
- * pointer type.
- * 2. If argument was a value, recast it from void pointer to original
- * pointer type and dereference it.
  */
 static void efi_call_rts(struct work_struct *work)
 {
-       void *arg1, *arg2, *arg3, *arg4, *arg5;
+       const union efi_rts_args *args = efi_rts_work.args;
        efi_status_t status = EFI_NOT_FOUND;
+       unsigned long flags;
 
-       arg1 = efi_rts_work.arg1;
-       arg2 = efi_rts_work.arg2;
-       arg3 = efi_rts_work.arg3;
-       arg4 = efi_rts_work.arg4;
-       arg5 = efi_rts_work.arg5;
+       arch_efi_call_virt_setup();
+       flags = efi_call_virt_save_flags();
 
        switch (efi_rts_work.efi_rts_id) {
        case EFI_GET_TIME:
-               status = efi_call_virt(get_time, (efi_time_t *)arg1,
-                                      (efi_time_cap_t *)arg2);
+               status = efi_call_virt(get_time,
+                                      args->GET_TIME.time,
+                                      args->GET_TIME.capabilities);
                break;
        case EFI_SET_TIME:
-               status = efi_call_virt(set_time, (efi_time_t *)arg1);
+               status = efi_call_virt(set_time,
+                                      args->SET_TIME.time);
                break;
        case EFI_GET_WAKEUP_TIME:
-               status = efi_call_virt(get_wakeup_time, (efi_bool_t *)arg1,
-                                      (efi_bool_t *)arg2, (efi_time_t *)arg3);
+               status = efi_call_virt(get_wakeup_time,
+                                      args->GET_WAKEUP_TIME.enabled,
+                                      args->GET_WAKEUP_TIME.pending,
+                                      args->GET_WAKEUP_TIME.time);
                break;
        case EFI_SET_WAKEUP_TIME:
-               status = efi_call_virt(set_wakeup_time, *(efi_bool_t *)arg1,
-                                      (efi_time_t *)arg2);
+               status = efi_call_virt(set_wakeup_time,
+                                      args->SET_WAKEUP_TIME.enable,
+                                      args->SET_WAKEUP_TIME.time);
                break;
        case EFI_GET_VARIABLE:
-               status = efi_call_virt(get_variable, (efi_char16_t *)arg1,
-                                      (efi_guid_t *)arg2, (u32 *)arg3,
-                                      (unsigned long *)arg4, (void *)arg5);
+               status = efi_call_virt(get_variable,
+                                      args->GET_VARIABLE.name,
+                                      args->GET_VARIABLE.vendor,
+                                      args->GET_VARIABLE.attr,
+                                      args->GET_VARIABLE.data_size,
+                                      args->GET_VARIABLE.data);
                break;
        case EFI_GET_NEXT_VARIABLE:
-               status = efi_call_virt(get_next_variable, (unsigned long *)arg1,
-                                      (efi_char16_t *)arg2,
-                                      (efi_guid_t *)arg3);
+               status = efi_call_virt(get_next_variable,
+                                      args->GET_NEXT_VARIABLE.name_size,
+                                      args->GET_NEXT_VARIABLE.name,
+                                      args->GET_NEXT_VARIABLE.vendor);
                break;
        case EFI_SET_VARIABLE:
-               status = efi_call_virt(set_variable, (efi_char16_t *)arg1,
-                                      (efi_guid_t *)arg2, *(u32 *)arg3,
-                                      *(unsigned long *)arg4, (void *)arg5);
+               status = efi_call_virt(set_variable,
+                                      args->SET_VARIABLE.name,
+                                      args->SET_VARIABLE.vendor,
+                                      args->SET_VARIABLE.attr,
+                                      args->SET_VARIABLE.data_size,
+                                      args->SET_VARIABLE.data);
                break;
        case EFI_QUERY_VARIABLE_INFO:
-               status = efi_call_virt(query_variable_info, *(u32 *)arg1,
-                                      (u64 *)arg2, (u64 *)arg3, (u64 *)arg4);
+               status = efi_call_virt(query_variable_info,
+                                      args->QUERY_VARIABLE_INFO.attr,
+                                      args->QUERY_VARIABLE_INFO.storage_space,
+                                      args->QUERY_VARIABLE_INFO.remaining_space,
+                                      args->QUERY_VARIABLE_INFO.max_variable_size);
                break;
        case EFI_GET_NEXT_HIGH_MONO_COUNT:
-               status = efi_call_virt(get_next_high_mono_count, (u32 *)arg1);
+               status = efi_call_virt(get_next_high_mono_count,
+                                      args->GET_NEXT_HIGH_MONO_COUNT.high_count);
                break;
        case EFI_UPDATE_CAPSULE:
                status = efi_call_virt(update_capsule,
-                                      (efi_capsule_header_t **)arg1,
-                                      *(unsigned long *)arg2,
-                                      *(unsigned long *)arg3);
+                                      args->UPDATE_CAPSULE.capsules,
+                                      args->UPDATE_CAPSULE.count,
+                                      args->UPDATE_CAPSULE.sg_list);
                break;
        case EFI_QUERY_CAPSULE_CAPS:
                status = efi_call_virt(query_capsule_caps,
-                                      (efi_capsule_header_t **)arg1,
-                                      *(unsigned long *)arg2, (u64 *)arg3,
-                                      (int *)arg4);
+                                      args->QUERY_CAPSULE_CAPS.capsules,
+                                      args->QUERY_CAPSULE_CAPS.count,
+                                      args->QUERY_CAPSULE_CAPS.max_size,
+                                      args->QUERY_CAPSULE_CAPS.reset_type);
                break;
+       case EFI_ACPI_PRM_HANDLER:
+#ifdef CONFIG_ACPI_PRMT
+               status = arch_efi_call_virt(args, ACPI_PRM_HANDLER.acpi_prm_handler,
+                                           args->ACPI_PRM_HANDLER.param_buffer_addr,
+                                           args->ACPI_PRM_HANDLER.context);
+               break;
+#endif
        default:
                /*
                 * Ideally, we should never reach here because a caller of this
@@ -246,17 +304,53 @@ static void efi_call_rts(struct work_struct *work)
                 */
                pr_err("Requested executing invalid EFI Runtime Service.\n");
        }
+
+       efi_call_virt_check_flags(flags, efi_rts_work.caller);
+       arch_efi_call_virt_teardown();
+
        efi_rts_work.status = status;
        complete(&efi_rts_work.efi_rts_comp);
 }
 
+static efi_status_t __efi_queue_work(enum efi_rts_ids id,
+                                    union efi_rts_args *args)
+{
+       efi_rts_work.efi_rts_id = id;
+       efi_rts_work.args = args;
+       efi_rts_work.caller = __builtin_return_address(0);
+       efi_rts_work.status = EFI_ABORTED;
+
+       if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+               pr_warn_once("EFI Runtime Services are disabled!\n");
+               efi_rts_work.status = EFI_DEVICE_ERROR;
+               goto exit;
+       }
+
+       init_completion(&efi_rts_work.efi_rts_comp);
+       INIT_WORK(&efi_rts_work.work, efi_call_rts);
+
+       /*
+        * queue_work() returns 0 if work was already on queue,
+        * _ideally_ this should never happen.
+        */
+       if (queue_work(efi_rts_wq, &efi_rts_work.work))
+               wait_for_completion(&efi_rts_work.efi_rts_comp);
+       else
+               pr_err("Failed to queue work to efi_rts_wq.\n");
+
+       WARN_ON_ONCE(efi_rts_work.status == EFI_ABORTED);
+exit:
+       efi_rts_work.efi_rts_id = EFI_NONE;
+       return efi_rts_work.status;
+}
+
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
 {
        efi_status_t status;
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_GET_TIME, tm, tc, NULL, NULL, NULL);
+       status = efi_queue_work(GET_TIME, tm, tc);
        up(&efi_runtime_lock);
        return status;
 }
@@ -267,7 +361,7 @@ static efi_status_t virt_efi_set_time(efi_time_t *tm)
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_SET_TIME, tm, NULL, NULL, NULL, NULL);
+       status = efi_queue_work(SET_TIME, tm);
        up(&efi_runtime_lock);
        return status;
 }
@@ -280,8 +374,7 @@ static efi_status_t virt_efi_get_wakeup_time(efi_bool_t *enabled,
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_GET_WAKEUP_TIME, enabled, pending, tm, NULL,
-                               NULL);
+       status = efi_queue_work(GET_WAKEUP_TIME, enabled, pending, tm);
        up(&efi_runtime_lock);
        return status;
 }
@@ -292,8 +385,7 @@ static efi_status_t virt_efi_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm)
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_SET_WAKEUP_TIME, &enabled, tm, NULL, NULL,
-                               NULL);
+       status = efi_queue_work(SET_WAKEUP_TIME, enabled, tm);
        up(&efi_runtime_lock);
        return status;
 }
@@ -308,7 +400,7 @@ static efi_status_t virt_efi_get_variable(efi_char16_t *name,
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_GET_VARIABLE, name, vendor, attr, data_size,
+       status = efi_queue_work(GET_VARIABLE, name, vendor, attr, data_size,
                                data);
        up(&efi_runtime_lock);
        return status;
@@ -322,8 +414,7 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_GET_NEXT_VARIABLE, name_size, name, vendor,
-                               NULL, NULL);
+       status = efi_queue_work(GET_NEXT_VARIABLE, name_size, name, vendor);
        up(&efi_runtime_lock);
        return status;
 }
@@ -338,24 +429,23 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_SET_VARIABLE, name, vendor, &attr, &data_size,
+       status = efi_queue_work(SET_VARIABLE, name, vendor, attr, data_size,
                                data);
        up(&efi_runtime_lock);
        return status;
 }
 
 static efi_status_t
-virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
-                                 u32 attr, unsigned long data_size,
-                                 void *data)
+virt_efi_set_variable_nb(efi_char16_t *name, efi_guid_t *vendor, u32 attr,
+                        unsigned long data_size, void *data)
 {
        efi_status_t status;
 
        if (down_trylock(&efi_runtime_lock))
                return EFI_NOT_READY;
 
-       status = efi_call_virt(set_variable, name, vendor, attr, data_size,
-                              data);
+       status = efi_call_virt_pointer(efi.runtime, set_variable, name, vendor,
+                                      attr, data_size, data);
        up(&efi_runtime_lock);
        return status;
 }
@@ -373,17 +463,15 @@ static efi_status_t virt_efi_query_variable_info(u32 attr,
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_QUERY_VARIABLE_INFO, &attr, storage_space,
-                               remaining_space, max_variable_size, NULL);
+       status = efi_queue_work(QUERY_VARIABLE_INFO, attr, storage_space,
+                               remaining_space, max_variable_size);
        up(&efi_runtime_lock);
        return status;
 }
 
 static efi_status_t
-virt_efi_query_variable_info_nonblocking(u32 attr,
-                                        u64 *storage_space,
-                                        u64 *remaining_space,
-                                        u64 *max_variable_size)
+virt_efi_query_variable_info_nb(u32 attr, u64 *storage_space,
+                               u64 *remaining_space, u64 *max_variable_size)
 {
        efi_status_t status;
 
@@ -393,8 +481,9 @@ virt_efi_query_variable_info_nonblocking(u32 attr,
        if (down_trylock(&efi_runtime_lock))
                return EFI_NOT_READY;
 
-       status = efi_call_virt(query_variable_info, attr, storage_space,
-                              remaining_space, max_variable_size);
+       status = efi_call_virt_pointer(efi.runtime, query_variable_info, attr,
+                                      storage_space, remaining_space,
+                                      max_variable_size);
        up(&efi_runtime_lock);
        return status;
 }
@@ -405,8 +494,7 @@ static efi_status_t virt_efi_get_next_high_mono_count(u32 *count)
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_GET_NEXT_HIGH_MONO_COUNT, count, NULL, NULL,
-                               NULL, NULL);
+       status = efi_queue_work(GET_NEXT_HIGH_MONO_COUNT, count);
        up(&efi_runtime_lock);
        return status;
 }
@@ -421,8 +509,13 @@ static void virt_efi_reset_system(int reset_type,
                        "could not get exclusive access to the firmware\n");
                return;
        }
+
+       arch_efi_call_virt_setup();
        efi_rts_work.efi_rts_id = EFI_RESET_SYSTEM;
-       __efi_call_virt(reset_system, reset_type, status, data_size, data);
+       arch_efi_call_virt(efi.runtime, reset_system, reset_type, status,
+                          data_size, data);
+       arch_efi_call_virt_teardown();
+
        up(&efi_runtime_lock);
 }
 
@@ -437,8 +530,7 @@ static efi_status_t virt_efi_update_capsule(efi_capsule_header_t **capsules,
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_UPDATE_CAPSULE, capsules, &count, &sg_list,
-                               NULL, NULL);
+       status = efi_queue_work(UPDATE_CAPSULE, capsules, count, sg_list);
        up(&efi_runtime_lock);
        return status;
 }
@@ -455,26 +547,44 @@ static efi_status_t virt_efi_query_capsule_caps(efi_capsule_header_t **capsules,
 
        if (down_interruptible(&efi_runtime_lock))
                return EFI_ABORTED;
-       status = efi_queue_work(EFI_QUERY_CAPSULE_CAPS, capsules, &count,
-                               max_size, reset_type, NULL);
+       status = efi_queue_work(QUERY_CAPSULE_CAPS, capsules, count,
+                               max_size, reset_type);
        up(&efi_runtime_lock);
        return status;
 }
 
-void efi_native_runtime_setup(void)
+void __init efi_native_runtime_setup(void)
 {
-       efi.get_time = virt_efi_get_time;
-       efi.set_time = virt_efi_set_time;
-       efi.get_wakeup_time = virt_efi_get_wakeup_time;
-       efi.set_wakeup_time = virt_efi_set_wakeup_time;
-       efi.get_variable = virt_efi_get_variable;
-       efi.get_next_variable = virt_efi_get_next_variable;
-       efi.set_variable = virt_efi_set_variable;
-       efi.set_variable_nonblocking = virt_efi_set_variable_nonblocking;
-       efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
-       efi.reset_system = virt_efi_reset_system;
-       efi.query_variable_info = virt_efi_query_variable_info;
-       efi.query_variable_info_nonblocking = virt_efi_query_variable_info_nonblocking;
-       efi.update_capsule = virt_efi_update_capsule;
-       efi.query_capsule_caps = virt_efi_query_capsule_caps;
+       efi.get_time                        = virt_efi_get_time;
+       efi.set_time                        = virt_efi_set_time;
+       efi.get_wakeup_time                 = virt_efi_get_wakeup_time;
+       efi.set_wakeup_time                 = virt_efi_set_wakeup_time;
+       efi.get_variable                    = virt_efi_get_variable;
+       efi.get_next_variable               = virt_efi_get_next_variable;
+       efi.set_variable                    = virt_efi_set_variable;
+       efi.set_variable_nonblocking        = virt_efi_set_variable_nb;
+       efi.get_next_high_mono_count        = virt_efi_get_next_high_mono_count;
+       efi.reset_system                    = virt_efi_reset_system;
+       efi.query_variable_info             = virt_efi_query_variable_info;
+       efi.query_variable_info_nonblocking = virt_efi_query_variable_info_nb;
+       efi.update_capsule                  = virt_efi_update_capsule;
+       efi.query_capsule_caps              = virt_efi_query_capsule_caps;
 }
+
+#ifdef CONFIG_ACPI_PRMT
+
+efi_status_t
+efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *),
+                         u64 param_buffer_addr, void *context)
+{
+       efi_status_t status;
+
+       if (down_interruptible(&efi_runtime_lock))
+               return EFI_ABORTED;
+       status = efi_queue_work(ACPI_PRM_HANDLER, handler_addr,
+                               param_buffer_addr, context);
+       up(&efi_runtime_lock);
+       return status;
+}
+
+#endif
index 890eb45..1990263 100644 (file)
@@ -34,7 +34,6 @@ static struct soc_device_attribute *soc_dev_attr;
 
 static int __init smccc_soc_init(void)
 {
-       struct arm_smccc_res res;
        int soc_id_rev, soc_id_version;
        static char soc_id_str[20], soc_id_rev_str[12];
        static char soc_id_jep106_id_str[12];
@@ -49,13 +48,13 @@ static int __init smccc_soc_init(void)
        }
 
        if (soc_id_version < 0) {
-               pr_err("ARCH_SOC_ID(0) returned error: %lx\n", res.a0);
+               pr_err("Invalid SoC Version: %x\n", soc_id_version);
                return -EINVAL;
        }
 
        soc_id_rev = arm_smccc_get_soc_id_revision();
        if (soc_id_rev < 0) {
-               pr_err("ARCH_SOC_ID(1) returned error: %lx\n", res.a0);
+               pr_err("Invalid SoC Revision: %x\n", soc_id_rev);
                return -EINVAL;
        }
 
index a68f682..6749711 100644 (file)
@@ -874,7 +874,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
 
        spin_lock_init(&mvpwm->lock);
 
-       return pwmchip_add(&mvpwm->chip);
+       return devm_pwmchip_add(dev, &mvpwm->chip);
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -1112,6 +1112,13 @@ static int mvebu_gpio_probe_syscon(struct platform_device *pdev,
        return 0;
 }
 
+static void mvebu_gpio_remove_irq_domain(void *data)
+{
+       struct irq_domain *domain = data;
+
+       irq_domain_remove(domain);
+}
+
 static int mvebu_gpio_probe(struct platform_device *pdev)
 {
        struct mvebu_gpio_chip *mvchip;
@@ -1243,17 +1250,21 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
        if (!mvchip->domain) {
                dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
                        mvchip->chip.label);
-               err = -ENODEV;
-               goto err_pwm;
+               return -ENODEV;
        }
 
+       err = devm_add_action_or_reset(&pdev->dev, mvebu_gpio_remove_irq_domain,
+                                      mvchip->domain);
+       if (err)
+               return err;
+
        err = irq_alloc_domain_generic_chips(
            mvchip->domain, ngpios, 2, np->name, handle_level_irq,
            IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_LEVEL, 0, 0);
        if (err) {
                dev_err(&pdev->dev, "couldn't allocate irq chips %s (DT).\n",
                        mvchip->chip.label);
-               goto err_domain;
+               return err;
        }
 
        /*
@@ -1293,13 +1304,6 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
        }
 
        return 0;
-
-err_domain:
-       irq_domain_remove(mvchip->domain);
-err_pwm:
-       pwmchip_remove(&mvchip->mvpwm->chip);
-
-       return err;
 }
 
 static struct platform_driver mvebu_gpio_driver = {
index 8b49b0a..533d815 100644 (file)
@@ -291,6 +291,15 @@ static void gpio_sim_mutex_destroy(void *data)
        mutex_destroy(lock);
 }
 
+static void gpio_sim_dispose_mappings(void *data)
+{
+       struct gpio_sim_chip *chip = data;
+       unsigned int i;
+
+       for (i = 0; i < chip->gc.ngpio; i++)
+               irq_dispose_mapping(irq_find_mapping(chip->irq_sim, i));
+}
+
 static void gpio_sim_sysfs_remove(void *data)
 {
        struct gpio_sim_chip *chip = data;
@@ -402,10 +411,14 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
        if (!chip->pull_map)
                return -ENOMEM;
 
-       chip->irq_sim = devm_irq_domain_create_sim(dev, NULL, num_lines);
+       chip->irq_sim = devm_irq_domain_create_sim(dev, swnode, num_lines);
        if (IS_ERR(chip->irq_sim))
                return PTR_ERR(chip->irq_sim);
 
+       ret = devm_add_action_or_reset(dev, gpio_sim_dispose_mappings, chip);
+       if (ret)
+               return ret;
+
        mutex_init(&chip->lock);
        ret = devm_add_action_or_reset(dev, gpio_sim_mutex_destroy,
                                       &chip->lock);
@@ -429,6 +442,7 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
        gc->set_config = gpio_sim_set_config;
        gc->to_irq = gpio_sim_to_irq;
        gc->free = gpio_sim_free;
+       gc->can_sleep = true;
 
        ret = devm_gpiochip_add_data(dev, gc, chip);
        if (ret)
index aaddcab..532dead 100644 (file)
@@ -91,13 +91,13 @@ static int tps68470_gpio_output(struct gpio_chip *gc, unsigned int offset,
        struct tps68470_gpio_data *tps68470_gpio = gpiochip_get_data(gc);
        struct regmap *regmap = tps68470_gpio->tps68470_regmap;
 
+       /* Set the initial value */
+       tps68470_gpio_set(gc, offset, value);
+
        /* rest are always outputs */
        if (offset >= TPS68470_N_REGULAR_GPIO)
                return 0;
 
-       /* Set the initial value */
-       tps68470_gpio_set(gc, offset, value);
-
        return regmap_update_bits(regmap, TPS68470_GPIO_CTL_REG_A(offset),
                                 TPS68470_GPIO_MODE_MASK,
                                 TPS68470_GPIO_MODE_OUT_CMOS);
index e73885a..afb42a8 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-#define WS16C48_EXTENT 10
+#define WS16C48_EXTENT 11
 #define MAX_NUM_WS16C48 max_num_isa_dev(WS16C48_EXTENT)
 
 static unsigned int base[MAX_NUM_WS16C48];
index 530dfd1..50503a4 100644 (file)
@@ -515,8 +515,9 @@ static ssize_t unexport_store(const struct class *class,
         * they may be undone on its behalf too.
         */
        if (test_and_clear_bit(FLAG_SYSFS, &desc->flags)) {
-               status = 0;
+               gpiod_unexport(desc);
                gpiod_free(desc);
+               status = 0;
        }
 done:
        if (status)
@@ -781,8 +782,10 @@ void gpiochip_sysfs_unregister(struct gpio_device *gdev)
        mutex_unlock(&sysfs_lock);
 
        /* unregister gpiod class devices owned by sysfs */
-       for_each_gpio_desc_with_flag(chip, desc, FLAG_SYSFS)
+       for_each_gpio_desc_with_flag(chip, desc, FLAG_SYSFS) {
+               gpiod_unexport(desc);
                gpiod_free(desc);
+       }
 }
 
 static int __init gpiolib_sysfs_init(void)
index 251c875..76e0c38 100644 (file)
@@ -2167,12 +2167,18 @@ static bool gpiod_free_commit(struct gpio_desc *desc)
 
 void gpiod_free(struct gpio_desc *desc)
 {
-       if (desc && desc->gdev && gpiod_free_commit(desc)) {
-               module_put(desc->gdev->owner);
-               gpio_device_put(desc->gdev);
-       } else {
+       /*
+        * We must not use VALIDATE_DESC_VOID() as the underlying gdev->chip
+        * may already be NULL but we still want to put the references.
+        */
+       if (!desc)
+               return;
+
+       if (!gpiod_free_commit(desc))
                WARN_ON(extra_checks);
-       }
+
+       module_put(desc->gdev->owner);
+       gpio_device_put(desc->gdev);
 }
 
 /**
index 2f9c14a..6dc950c 100644 (file)
@@ -1296,6 +1296,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
 int amdgpu_device_pci_reset(struct amdgpu_device *adev);
 bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
+bool amdgpu_device_pcie_dynamic_switching_supported(void);
 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
 bool amdgpu_device_aspm_support_quirk(void);
 
index f61527b..d34c3ef 100644 (file)
@@ -1709,7 +1709,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
                        alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
                        AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
                }
-               xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id;
+               xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
+                                       0 : fpriv->xcp_id;
        } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
                domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
                alloc_flags = 0;
@@ -2881,6 +2882,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
                        if (!attachment->is_mapped)
                                continue;
 
+                       if (attachment->bo_va->base.bo->tbo.pin_count)
+                               continue;
+
                        kfd_mem_dmaunmap_attachment(mem, attachment);
                        ret = update_gpuvm_pte(mem, attachment, &sync_obj);
                        if (ret) {
index 040f4cb..fb78a8f 100644 (file)
@@ -295,7 +295,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
 
        if (!p->gang_size) {
                ret = -EINVAL;
-               goto free_partial_kdata;
+               goto free_all_kdata;
        }
 
        for (i = 0; i < p->gang_size; ++i) {
index a92c618..6238701 100644 (file)
@@ -1458,6 +1458,51 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
        return true;
 }
 
+/*
+ * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
+ * Disable S/G on such systems until we have a proper fix.
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
+ */
+bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
+{
+       switch (amdgpu_sg_display) {
+       case -1:
+               break;
+       case 0:
+               return false;
+       case 1:
+               return true;
+       default:
+               return false;
+       }
+       if ((totalram_pages() << (PAGE_SHIFT - 10)) +
+           (adev->gmc.real_vram_size / 1024) >= 64000000) {
+               DRM_WARN("Disabling S/G due to >=64GB RAM\n");
+               return false;
+       }
+       return true;
+}
+
+/*
+ * Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
+ * speed switching. Until we have confirmation from Intel that a specific host
+ * supports it, it's safer that we keep it disabled for all.
+ *
+ * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2663
+ */
+bool amdgpu_device_pcie_dynamic_switching_supported(void)
+{
+#if IS_ENABLED(CONFIG_X86)
+       struct cpuinfo_x86 *c = &cpu_data(0);
+
+       if (c->x86_vendor == X86_VENDOR_INTEL)
+               return false;
+#endif
+       return true;
+}
+
 /**
  * amdgpu_device_should_use_aspm - check if the device should program ASPM
  *
@@ -3677,10 +3722,11 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
 {
        if (amdgpu_mcbp == 1)
                adev->gfx.mcbp = true;
-
-       if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
-           (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
-           adev->gfx.num_gfx_rings)
+       else if (amdgpu_mcbp == 0)
+               adev->gfx.mcbp = false;
+       else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
+                (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
+                adev->gfx.num_gfx_rings)
                adev->gfx.mcbp = true;
 
        if (amdgpu_sriov_vf(adev))
@@ -4348,6 +4394,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
                drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
 
        cancel_delayed_work_sync(&adev->delayed_init_work);
+       flush_delayed_work(&adev->gfx.gfx_off_delay_work);
 
        amdgpu_ras_suspend(adev);
 
index c694b41..7537f5a 100644 (file)
@@ -552,6 +552,41 @@ int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
 }
 
 /**
+ * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
+ * fence driver interrupts need to be restored.
+ *
+ * @ring: ring that to be checked
+ *
+ * Interrupts for rings that belong to GFX IP don't need to be restored
+ * when the target power state is s0ix.
+ *
+ * Return true if need to restore interrupts, false otherwise.
+ */
+static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
+{
+       struct amdgpu_device *adev = ring->adev;
+       bool is_gfx_power_domain = false;
+
+       switch (ring->funcs->type) {
+       case AMDGPU_RING_TYPE_SDMA:
+       /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+               if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0))
+                       is_gfx_power_domain = true;
+               break;
+       case AMDGPU_RING_TYPE_GFX:
+       case AMDGPU_RING_TYPE_COMPUTE:
+       case AMDGPU_RING_TYPE_KIQ:
+       case AMDGPU_RING_TYPE_MES:
+               is_gfx_power_domain = true;
+               break;
+       default:
+               break;
+       }
+
+       return !(adev->in_s0ix && is_gfx_power_domain);
+}
+
+/**
  * amdgpu_fence_driver_hw_fini - tear down the fence driver
  * for all possible rings.
  *
@@ -579,7 +614,8 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
                        amdgpu_fence_driver_force_completion(ring);
 
                if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
-                   ring->fence_drv.irq_src)
+                   ring->fence_drv.irq_src &&
+                   amdgpu_fence_need_ring_interrupt_restore(ring))
                        amdgpu_irq_put(adev, ring->fence_drv.irq_src,
                                       ring->fence_drv.irq_type);
 
@@ -655,7 +691,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
                        continue;
 
                /* enable the interrupt */
-               if (ring->fence_drv.irq_src)
+               if (ring->fence_drv.irq_src &&
+                   amdgpu_fence_need_ring_interrupt_restore(ring))
                        amdgpu_irq_get(adev, ring->fence_drv.irq_src,
                                       ring->fence_drv.irq_type);
        }
index a33d4bc..fd81b04 100644 (file)
@@ -692,15 +692,8 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 
                if (adev->gfx.gfx_off_req_count == 0 &&
                    !adev->gfx.gfx_off_state) {
-                       /* If going to s2idle, no need to wait */
-                       if (adev->in_s0ix) {
-                               if (!amdgpu_dpm_set_powergating_by_smu(adev,
-                                               AMD_IP_BLOCK_TYPE_GFX, true))
-                                       adev->gfx.gfx_off_state = true;
-                       } else {
-                               schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+                       schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
                                              delay);
-                       }
                }
        } else {
                if (adev->gfx.gfx_off_req_count == 0) {
index cca5a49..12414a7 100644 (file)
@@ -1229,13 +1229,13 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
                pasid = 0;
        }
 
-       r = amdgpu_vm_init(adev, &fpriv->vm);
+       r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
        if (r)
                goto error_pasid;
 
-       r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
+       r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
        if (r)
-               goto error_vm;
+               goto error_pasid;
 
        r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
        if (r)
index e9091eb..f808841 100644 (file)
@@ -1382,7 +1382,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
                goto error_pasid;
        }
 
-       r = amdgpu_vm_init(adev, vm);
+       r = amdgpu_vm_init(adev, vm, -1);
        if (r) {
                DRM_ERROR("failed to initialize vm\n");
                goto error_pasid;
index 6d676bd..78d1ee7 100644 (file)
@@ -498,11 +498,11 @@ static int psp_sw_init(void *handle)
        return 0;
 
 failed2:
-       amdgpu_bo_free_kernel(&psp->fw_pri_bo,
-                             &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
-failed1:
        amdgpu_bo_free_kernel(&psp->fence_buf_bo,
                              &psp->fence_buf_mc_addr, &psp->fence_buf);
+failed1:
+       amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+                             &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
        return ret;
 }
 
index b779ee4..e1ee1c7 100644 (file)
@@ -397,7 +397,7 @@ void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
        struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
 
        WARN_ON(!ring->is_sw_ring);
-       if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+       if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
                if (amdgpu_mcbp_scan(mux) > 0)
                        amdgpu_mcbp_trigger_preempt(mux);
                return;
index 53ff91f..d0748bc 100644 (file)
@@ -55,8 +55,9 @@ static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
                DRM_WARN("%s: vblank timer overrun\n", __func__);
 
        ret = drm_crtc_handle_vblank(crtc);
+       /* Don't queue timer again when vblank is disabled. */
        if (!ret)
-               DRM_ERROR("amdgpu_vkms failure on handling vblank");
+               return HRTIMER_NORESTART;
 
        return HRTIMER_RESTART;
 }
@@ -81,7 +82,7 @@ static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc)
 {
        struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 
-       hrtimer_cancel(&amdgpu_crtc->vblank_timer);
+       hrtimer_try_to_cancel(&amdgpu_crtc->vblank_timer);
 }
 
 static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
index 291977b..ec1ec08 100644 (file)
@@ -2121,13 +2121,14 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @xcp_id: GPU partition selection id
  *
  * Init @vm fields.
  *
  * Returns:
  * 0 for success, error for failure.
  */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
 {
        struct amdgpu_bo *root_bo;
        struct amdgpu_bo_vm *root;
@@ -2177,7 +2178,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        vm->evicting = false;
 
        r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
-                               false, &root);
+                               false, &root, xcp_id);
        if (r)
                goto error_free_delayed;
        root_bo = &root->bo;
index 9c85d49..ffac741 100644 (file)
@@ -392,7 +392,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                        u32 pasid);
 
 long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@@ -475,7 +475,8 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
 int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
                       struct amdgpu_bo_vm *vmbo, bool immediate);
 int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-                       int level, bool immediate, struct amdgpu_bo_vm **vmbo);
+                       int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+                       int32_t xcp_id);
 void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
                                struct amdgpu_vm *vm);
index dea1a64..5431332 100644 (file)
@@ -498,11 +498,12 @@ exit:
  * @level: the page table level
  * @immediate: use a immediate update
  * @vmbo: pointer to the buffer object pointer
+ * @xcp_id: GPU partition id
  */
 int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-                       int level, bool immediate, struct amdgpu_bo_vm **vmbo)
+                       int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+                       int32_t xcp_id)
 {
-       struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
        struct amdgpu_bo_param bp;
        struct amdgpu_bo *bo;
        struct dma_resv *resv;
@@ -535,7 +536,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
        bp.type = ttm_bo_type_kernel;
        bp.no_wait_gpu = immediate;
-       bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+       bp.xcp_id_plus1 = xcp_id + 1;
 
        if (vm->root.bo)
                bp.resv = vm->root.bo->tbo.base.resv;
@@ -561,7 +562,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
        bp.type = ttm_bo_type_kernel;
        bp.resv = bo->tbo.base.resv;
        bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-       bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+       bp.xcp_id_plus1 = xcp_id + 1;
 
        r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
 
@@ -606,7 +607,8 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
                return 0;
 
        amdgpu_vm_eviction_unlock(vm);
-       r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
+       r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt,
+                               vm->root.bo->xcp_id);
        amdgpu_vm_eviction_lock(vm);
        if (r)
                return r;
index d175e86..565a1fa 100644 (file)
@@ -239,8 +239,13 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
 
        for (i = 1; i < MAX_XCP; i++) {
                ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
-               if (ret)
+               if (ret == -ENOSPC) {
+                       dev_warn(adev->dev,
+                       "Skip xcp node #%d when out of drm node resource.", i);
+                       return 0;
+               } else if (ret) {
                        return ret;
+               }
 
                /* Redirect all IOCTLs to the primary device */
                adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
@@ -328,6 +333,9 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
                return 0;
 
        for (i = 1; i < MAX_XCP; i++) {
+               if (!adev->xcp_mgr->xcp[i].ddev)
+                       break;
+
                ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
                if (ret)
                        return ret;
@@ -345,6 +353,9 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
                return;
 
        for (i = 1; i < MAX_XCP; i++) {
+               if (!adev->xcp_mgr->xcp[i].ddev)
+                       break;
+
                p_ddev = adev->xcp_mgr->xcp[i].ddev;
                drm_dev_unplug(p_ddev);
                p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
@@ -363,7 +374,7 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev,
        if (!adev->xcp_mgr)
                return 0;
 
-       fpriv->xcp_id = ~0;
+       fpriv->xcp_id = AMDGPU_XCP_NO_PARTITION;
        for (i = 0; i < MAX_XCP; ++i) {
                if (!adev->xcp_mgr->xcp[i].ddev)
                        break;
@@ -381,7 +392,7 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev,
                }
        }
 
-       fpriv->vm.mem_id = fpriv->xcp_id == ~0 ? -1 :
+       fpriv->vm.mem_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? -1 :
                                adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
        return 0;
 }
index 0f8026d..9a1036a 100644 (file)
@@ -37,6 +37,8 @@
 #define AMDGPU_XCP_FL_NONE 0
 #define AMDGPU_XCP_FL_LOCKED (1 << 0)
 
+#define AMDGPU_XCP_NO_PARTITION (~0)
+
 struct amdgpu_fpriv;
 
 enum AMDGPU_XCP_IP_BLOCK {
index 16471b8..72b629a 100644 (file)
@@ -68,7 +68,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
        enum AMDGPU_XCP_IP_BLOCK ip_blk;
        uint32_t inst_mask;
 
-       ring->xcp_id = ~0;
+       ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
        if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
                return;
 
@@ -177,7 +177,7 @@ static int aqua_vanjaram_select_scheds(
        u32 sel_xcp_id;
        int i;
 
-       if (fpriv->xcp_id == ~0) {
+       if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) {
                u32 least_ref_cnt = ~0;
 
                fpriv->xcp_id = 0;
index 3a7af59..0451533 100644 (file)
@@ -471,8 +471,12 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
        case IP_VERSION(11, 0, 3):
                if ((adev->gfx.me_fw_version >= 1505) &&
                    (adev->gfx.pfp_fw_version >= 1600) &&
-                   (adev->gfx.mec_fw_version >= 512))
-                       adev->gfx.cp_gfx_shadow = true;
+                   (adev->gfx.mec_fw_version >= 512)) {
+                       if (amdgpu_sriov_vf(adev))
+                               adev->gfx.cp_gfx_shadow = true;
+                       else
+                               adev->gfx.cp_gfx_shadow = false;
+               }
                break;
        default:
                adev->gfx.cp_gfx_shadow = false;
index 9e3b835..4f883b9 100644 (file)
@@ -46,6 +46,7 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
 
 #define GOLDEN_GB_ADDR_CONFIG 0x2a114042
+#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
 
 struct amdgpu_gfx_ras gfx_v9_4_3_ras;
 
@@ -1736,7 +1737,7 @@ static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring,
 
        WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IQ_TIMER, 0);
        WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL, 0);
-       WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, 0);
+       WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, CP_HQD_PERSISTENT_STATE_DEFAULT);
        WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
        WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
        WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
index 4dabf91..d9f14dc 100644 (file)
@@ -402,18 +402,15 @@ static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev,
 static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev,
                                       uint32_t xcc_mask)
 {
-       uint32_t tmp_mask;
        int i;
 
-       tmp_mask = xcc_mask;
        /*
         * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, because they are
         * VF copy registers so vbios post doesn't program them, for
         * SRIOV driver need to program them
         */
        if (amdgpu_sriov_vf(adev)) {
-               for_each_inst(i, tmp_mask) {
-                       i = ffs(tmp_mask) - 1;
+               for_each_inst(i, xcc_mask) {
                        WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE,
                                     adev->gmc.vram_start >> 24);
                        WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP,
index f9cb0d2..af5685f 100644 (file)
@@ -49,6 +49,7 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin");
 
 /* For large FW files the time to complete can be very long */
 #define USBC_PD_POLLING_LIMIT_S 240
@@ -136,14 +137,15 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
        int ret;
        int retry_loop;
 
+       /* Wait for bootloader to signify that it is ready having bit 31 of
+        * C2PMSG_35 set to 1. All other bits are expected to be cleared.
+        * If there is an error in processing command, bits[7:0] will be set.
+        * This is applicable for PSP v13.0.6 and newer.
+        */
        for (retry_loop = 0; retry_loop < 10; retry_loop++) {
-               /* Wait for bootloader to signify that is
-                   ready having bit 31 of C2PMSG_35 set to 1 */
-               ret = psp_wait_for(psp,
-                                  SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
-                                  0x80000000,
-                                  0x80000000,
-                                  false);
+               ret = psp_wait_for(
+                       psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+                       0x80000000, 0xffffffff, false);
 
                if (ret == 0)
                        return 0;
index 49f40d9..f5a6f56 100644 (file)
@@ -1543,11 +1543,7 @@ static bool kfd_ignore_crat(void)
        if (ignore_crat)
                return true;
 
-#ifndef KFD_SUPPORT_IOMMU_V2
        ret = true;
-#else
-       ret = false;
-#endif
 
        return ret;
 }
index fff3ccc..9766076 100644 (file)
@@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
        if (!q)
                return 0;
 
-       if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
-           KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
+       if (!kfd_dbg_has_cwsr_workaround(q->device))
                return 0;
 
        if (enable && q->properties.is_user_cu_masked)
@@ -349,7 +348,7 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
 {
        uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
        uint32_t flags = pdd->process->dbg_flags;
-       bool sq_trap_en = !!spi_dbg_cntl;
+       bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
 
        if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
                return 0;
index a289e59..662a13a 100644 (file)
@@ -100,6 +100,12 @@ static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
                 KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
 }
 
+static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
+{
+       return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+              KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
+}
+
 static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
 {
        if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
index 0b3dc75..a53e075 100644 (file)
@@ -194,11 +194,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
 
                kfd_device_info_set_event_interrupt_class(kfd);
 
-               /* Raven */
-               if (gc_version == IP_VERSION(9, 1, 0) ||
-                   gc_version == IP_VERSION(9, 2, 2))
-                       kfd->device_info.needs_iommu_device = true;
-
                if (gc_version < IP_VERSION(11, 0, 0)) {
                        /* Navi2x+, Navi1x+ */
                        if (gc_version == IP_VERSION(10, 3, 6))
@@ -233,10 +228,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
                    asic_type != CHIP_TONGA)
                        kfd->device_info.supports_cwsr = true;
 
-               if (asic_type == CHIP_KAVERI ||
-                   asic_type == CHIP_CARRIZO)
-                       kfd->device_info.needs_iommu_device = true;
-
                if (asic_type != CHIP_HAWAII && !vf)
                        kfd->device_info.needs_pci_atomics = true;
        }
@@ -249,7 +240,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
        uint32_t gfx_target_version = 0;
 
        switch (adev->asic_type) {
-#ifdef KFD_SUPPORT_IOMMU_V2
 #ifdef CONFIG_DRM_AMDGPU_CIK
        case CHIP_KAVERI:
                gfx_target_version = 70000;
@@ -262,7 +252,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                if (!vf)
                        f2g = &gfx_v8_kfd2kgd;
                break;
-#endif
 #ifdef CONFIG_DRM_AMDGPU_CIK
        case CHIP_HAWAII:
                gfx_target_version = 70001;
@@ -298,7 +287,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                        gfx_target_version = 90000;
                        f2g = &gfx_v9_kfd2kgd;
                        break;
-#ifdef KFD_SUPPORT_IOMMU_V2
                /* Raven */
                case IP_VERSION(9, 1, 0):
                case IP_VERSION(9, 2, 2):
@@ -306,7 +294,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
                        if (!vf)
                                f2g = &gfx_v9_kfd2kgd;
                        break;
-#endif
                /* Vega12 */
                case IP_VERSION(9, 2, 1):
                        gfx_target_version = 90004;
index f515cb8..01192f5 100644 (file)
@@ -226,8 +226,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
        queue_input.paging = false;
        queue_input.tba_addr = qpd->tba_addr;
        queue_input.tma_addr = qpd->tma_addr;
-       queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
-                             KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3);
+       queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
        queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
 
        queue_type = convert_to_mes_queue_type(q->properties.type);
@@ -1806,8 +1805,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
         */
        q->properties.is_evicted = !!qpd->evicted;
        q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
-                       KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) &&
-                       KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
+                                 kfd_dbg_has_cwsr_workaround(q->device);
 
        if (qd)
                mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
@@ -2540,18 +2538,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
        }
 
        switch (dev->adev->asic_type) {
-       case CHIP_CARRIZO:
-               device_queue_manager_init_vi(&dqm->asic_ops);
-               break;
-
        case CHIP_KAVERI:
-               device_queue_manager_init_cik(&dqm->asic_ops);
-               break;
-
        case CHIP_HAWAII:
                device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
                break;
 
+       case CHIP_CARRIZO:
        case CHIP_TONGA:
        case CHIP_FIJI:
        case CHIP_POLARIS10:
index 61fc62f..4a17bb7 100644 (file)
@@ -1965,7 +1965,14 @@ int kfd_topology_add_device(struct kfd_node *gpu)
        const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
 
        gpu_id = kfd_generate_gpu_id(gpu);
-       pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+       if (gpu->xcp && !gpu->xcp->ddev) {
+               dev_warn(gpu->adev->dev,
+               "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
+               gpu_id);
+               return 0;
+       } else {
+               pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+       }
 
        /* Check to see if this gpu device exists in the topology_device_list.
         * If so, assign the gpu to that device,
index ff0a217..e5554a3 100644 (file)
@@ -424,12 +424,12 @@ static void dm_pflip_high_irq(void *interrupt_params)
 
        spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
 
-       if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
-               DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d !=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
-                                                amdgpu_crtc->pflip_status,
-                                                AMDGPU_FLIP_SUBMITTED,
-                                                amdgpu_crtc->crtc_id,
-                                                amdgpu_crtc);
+       if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+               DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d !=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p]\n",
+                            amdgpu_crtc->pflip_status,
+                            AMDGPU_FLIP_SUBMITTED,
+                            amdgpu_crtc->crtc_id,
+                            amdgpu_crtc);
                spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
                return;
        }
@@ -883,7 +883,7 @@ static int dm_set_powergating_state(void *handle,
 }
 
 /* Prototypes of private functions */
-static int dm_early_init(voidhandle);
+static int dm_early_init(void *handle);
 
 /* Allocate memory for FBC compressed data  */
 static void amdgpu_dm_fbc_init(struct drm_connector *connector)
@@ -1282,7 +1282,7 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
        pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18;
        pa_config->system_aperture.end_addr = (uint64_t)logical_addr_high << 18;
 
-       pa_config->system_aperture.agp_base = (uint64_t)agp_base << 24 ;
+       pa_config->system_aperture.agp_base = (uint64_t)agp_base << 24;
        pa_config->system_aperture.agp_bot = (uint64_t)agp_bot << 24;
        pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24;
 
@@ -1347,6 +1347,15 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
        if (amdgpu_in_reset(adev))
                goto skip;
 
+       if (offload_work->data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY ||
+               offload_work->data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
+               dm_handle_mst_sideband_msg_ready_event(&aconnector->mst_mgr, DOWN_OR_UP_MSG_RDY_EVENT);
+               spin_lock_irqsave(&offload_work->offload_wq->offload_lock, flags);
+               offload_work->offload_wq->is_handling_mst_msg_rdy_event = false;
+               spin_unlock_irqrestore(&offload_work->offload_wq->offload_lock, flags);
+               goto skip;
+       }
+
        mutex_lock(&adev->dm.dc_lock);
        if (offload_work->data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
                dc_link_dp_handle_automated_test(dc_link);
@@ -1365,8 +1374,7 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
                DP_TEST_RESPONSE,
                &test_response.raw,
                sizeof(test_response));
-       }
-       else if ((dc_link->connector_signal != SIGNAL_TYPE_EDP) &&
+       } else if ((dc_link->connector_signal != SIGNAL_TYPE_EDP) &&
                        dc_link_check_link_loss_status(dc_link, &offload_work->data) &&
                        dc_link_dp_allow_hpd_rx_irq(dc_link)) {
                /* offload_work->data is from handle_hpd_rx_irq->
@@ -1554,7 +1562,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
        mutex_init(&adev->dm.dc_lock);
        mutex_init(&adev->dm.audio_lock);
 
-       if(amdgpu_dm_irq_init(adev)) {
+       if (amdgpu_dm_irq_init(adev)) {
                DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
                goto error;
        }
@@ -1630,9 +1638,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
                }
                break;
        }
-       if (init_data.flags.gpu_vm_support &&
-           (amdgpu_sg_display == 0))
-               init_data.flags.gpu_vm_support = false;
+       if (init_data.flags.gpu_vm_support)
+               init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
 
        if (init_data.flags.gpu_vm_support)
                adev->mode_info.gpu_vm_support = true;
@@ -1696,9 +1703,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
        if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER)
                adev->dm.dc->debug.disable_stutter = true;
 
-       if (amdgpu_dc_debug_mask & DC_DISABLE_DSC) {
+       if (amdgpu_dc_debug_mask & DC_DISABLE_DSC)
                adev->dm.dc->debug.disable_dsc = true;
-       }
 
        if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
                adev->dm.dc->debug.disable_clock_gate = true;
@@ -1942,8 +1948,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
        mutex_destroy(&adev->dm.audio_lock);
        mutex_destroy(&adev->dm.dc_lock);
        mutex_destroy(&adev->dm.dpia_aux_lock);
-
-       return;
 }
 
 static int load_dmcu_fw(struct amdgpu_device *adev)
@@ -1952,7 +1956,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
        int r;
        const struct dmcu_firmware_header_v1_0 *hdr;
 
-       switch(adev->asic_type) {
+       switch (adev->asic_type) {
 #if defined(CONFIG_DRM_AMD_DC_SI)
        case CHIP_TAHITI:
        case CHIP_PITCAIRN:
@@ -2709,7 +2713,7 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
                struct dc_scaling_info scaling_infos[MAX_SURFACES];
                struct dc_flip_addrs flip_addrs[MAX_SURFACES];
                struct dc_stream_update stream_update;
-       } * bundle;
+       } *bundle;
        int k, m;
 
        bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
@@ -2739,8 +2743,6 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
 
 cleanup:
        kfree(bundle);
-
-       return;
 }
 
 static int dm_resume(void *handle)
@@ -2954,8 +2956,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = {
        .set_powergating_state = dm_set_powergating_state,
 };
 
-const struct amdgpu_ip_block_version dm_ip_block =
-{
+const struct amdgpu_ip_block_version dm_ip_block = {
        .type = AMD_IP_BLOCK_TYPE_DCE,
        .major = 1,
        .minor = 0,
@@ -3000,9 +3001,12 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
        caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
        caps->aux_support = false;
 
-       if (caps->ext_caps->bits.oled == 1 /*||
-           caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
-           caps->ext_caps->bits.hdr_aux_backlight_control == 1*/)
+       if (caps->ext_caps->bits.oled == 1
+           /*
+            * ||
+            * caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
+            * caps->ext_caps->bits.hdr_aux_backlight_control == 1
+            */)
                caps->aux_support = true;
 
        if (amdgpu_backlight == 0)
@@ -3236,86 +3240,6 @@ static void handle_hpd_irq(void *param)
 
 }
 
-static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector)
-{
-       u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
-       u8 dret;
-       bool new_irq_handled = false;
-       int dpcd_addr;
-       int dpcd_bytes_to_read;
-
-       const int max_process_count = 30;
-       int process_count = 0;
-
-       const struct dc_link_status *link_status = dc_link_get_status(aconnector->dc_link);
-
-       if (link_status->dpcd_caps->dpcd_rev.raw < 0x12) {
-               dpcd_bytes_to_read = DP_LANE0_1_STATUS - DP_SINK_COUNT;
-               /* DPCD 0x200 - 0x201 for downstream IRQ */
-               dpcd_addr = DP_SINK_COUNT;
-       } else {
-               dpcd_bytes_to_read = DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI;
-               /* DPCD 0x2002 - 0x2005 for downstream IRQ */
-               dpcd_addr = DP_SINK_COUNT_ESI;
-       }
-
-       dret = drm_dp_dpcd_read(
-               &aconnector->dm_dp_aux.aux,
-               dpcd_addr,
-               esi,
-               dpcd_bytes_to_read);
-
-       while (dret == dpcd_bytes_to_read &&
-               process_count < max_process_count) {
-               u8 ack[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = {};
-               u8 retry;
-               dret = 0;
-
-               process_count++;
-
-               DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], esi[2]);
-               /* handle HPD short pulse irq */
-               if (aconnector->mst_mgr.mst_state)
-                       drm_dp_mst_hpd_irq_handle_event(&aconnector->mst_mgr,
-                                                       esi,
-                                                       ack,
-                                                       &new_irq_handled);
-
-               if (new_irq_handled) {
-                       /* ACK at DPCD to notify down stream */
-                       for (retry = 0; retry < 3; retry++) {
-                               ssize_t wret;
-
-                               wret = drm_dp_dpcd_writeb(&aconnector->dm_dp_aux.aux,
-                                                         dpcd_addr + 1,
-                                                         ack[1]);
-                               if (wret == 1)
-                                       break;
-                       }
-
-                       if (retry == 3) {
-                               DRM_ERROR("Failed to ack MST event.\n");
-                               return;
-                       }
-
-                       drm_dp_mst_hpd_irq_send_new_request(&aconnector->mst_mgr);
-                       /* check if there is new irq to be handled */
-                       dret = drm_dp_dpcd_read(
-                               &aconnector->dm_dp_aux.aux,
-                               dpcd_addr,
-                               esi,
-                               dpcd_bytes_to_read);
-
-                       new_irq_handled = false;
-               } else {
-                       break;
-               }
-       }
-
-       if (process_count == max_process_count)
-               DRM_DEBUG_DRIVER("Loop exceeded max iterations\n");
-}
-
 static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq,
                                                        union hpd_irq_data hpd_irq_data)
 {
@@ -3377,7 +3301,23 @@ static void handle_hpd_rx_irq(void *param)
        if (dc_link_dp_allow_hpd_rx_irq(dc_link)) {
                if (hpd_irq_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY ||
                        hpd_irq_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
-                       dm_handle_mst_sideband_msg(aconnector);
+                       bool skip = false;
+
+                       /*
+                        * DOWN_REP_MSG_RDY is also handled by polling method
+                        * mgr->cbs->poll_hpd_irq()
+                        */
+                       spin_lock(&offload_wq->offload_lock);
+                       skip = offload_wq->is_handling_mst_msg_rdy_event;
+
+                       if (!skip)
+                               offload_wq->is_handling_mst_msg_rdy_event = true;
+
+                       spin_unlock(&offload_wq->offload_lock);
+
+                       if (!skip)
+                               schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+
                        goto out;
                }
 
@@ -3468,7 +3408,7 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
                aconnector = to_amdgpu_dm_connector(connector);
                dc_link = aconnector->dc_link;
 
-               if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd) {
+               if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
                        int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
                        int_params.irq_source = dc_link->irq_source_hpd;
 
@@ -3477,7 +3417,7 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
                                        (void *) aconnector);
                }
 
-               if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd_rx) {
+               if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
 
                        /* Also register for DP short pulse (hpd_rx). */
                        int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
@@ -3486,11 +3426,11 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
                        amdgpu_dm_irq_register_interrupt(adev, &int_params,
                                        handle_hpd_rx_irq,
                                        (void *) aconnector);
-
-                       if (adev->dm.hpd_rx_offload_wq)
-                               adev->dm.hpd_rx_offload_wq[dc_link->link_index].aconnector =
-                                       aconnector;
                }
+
+               if (adev->dm.hpd_rx_offload_wq)
+                       adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
+                               aconnector;
        }
 }
 
@@ -3503,7 +3443,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
        struct dc_interrupt_params int_params = {0};
        int r;
        int i;
-       unsigned client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+       unsigned int client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
 
        int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
        int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
@@ -3517,11 +3457,12 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
         *    Base driver will call amdgpu_dm_irq_handler() for ALL interrupts
         *    coming from DC hardware.
         *    amdgpu_dm_irq_handler() will re-direct the interrupt to DC
-        *    for acknowledging and handling. */
+        *    for acknowledging and handling.
+        */
 
        /* Use VBLANK interrupt */
        for (i = 0; i < adev->mode_info.num_crtc; i++) {
-               r = amdgpu_irq_add_id(adev, client_id, i+1 , &adev->crtc_irq);
+               r = amdgpu_irq_add_id(adev, client_id, i + 1, &adev->crtc_irq);
                if (r) {
                        DRM_ERROR("Failed to add crtc irq id!\n");
                        return r;
@@ -3529,7 +3470,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
 
                int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
                int_params.irq_source =
-                       dc_interrupt_to_irq_source(dc, i+1 , 0);
+                       dc_interrupt_to_irq_source(dc, i + 1, 0);
 
                c_irq_params = &adev->dm.vblank_params[int_params.irq_source - DC_IRQ_SOURCE_VBLANK1];
 
@@ -3585,7 +3526,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
        struct dc_interrupt_params int_params = {0};
        int r;
        int i;
-       unsigned client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+       unsigned int client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
 
        if (adev->family >= AMDGPU_FAMILY_AI)
                client_id = SOC15_IH_CLIENTID_DCE;
@@ -3602,7 +3543,8 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
         *    Base driver will call amdgpu_dm_irq_handler() for ALL interrupts
         *    coming from DC hardware.
         *    amdgpu_dm_irq_handler() will re-direct the interrupt to DC
-        *    for acknowledging and handling. */
+        *    for acknowledging and handling.
+        */
 
        /* Use VBLANK interrupt */
        for (i = VISLANDS30_IV_SRCID_D1_VERTICAL_INTERRUPT0; i <= VISLANDS30_IV_SRCID_D6_VERTICAL_INTERRUPT0; i++) {
@@ -4049,7 +3991,7 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
 }
 
 static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
-                               unsigned *min, unsigned *max)
+                               unsigned int *min, unsigned int *max)
 {
        if (!caps)
                return 0;
@@ -4069,7 +4011,7 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
 static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps,
                                        uint32_t brightness)
 {
-       unsigned min, max;
+       unsigned int min, max;
 
        if (!get_brightness_range(caps, &min, &max))
                return brightness;
@@ -4082,7 +4024,7 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c
 static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps,
                                      uint32_t brightness)
 {
-       unsigned min, max;
+       unsigned int min, max;
 
        if (!get_brightness_range(caps, &min, &max))
                return brightness;
@@ -4562,7 +4504,6 @@ fail:
 static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm)
 {
        drm_atomic_private_obj_fini(&dm->atomic_obj);
-       return;
 }
 
 /******************************************************************************
@@ -5394,6 +5335,7 @@ static bool adjust_colour_depth_from_display_info(
 {
        enum dc_color_depth depth = timing_out->display_color_depth;
        int normalized_clk;
+
        do {
                normalized_clk = timing_out->pix_clk_100hz / 10;
                /* YCbCr 4:2:0 requires additional adjustment of 1/2 */
@@ -5609,6 +5551,7 @@ create_fake_sink(struct amdgpu_dm_connector *aconnector)
 {
        struct dc_sink_init_data sink_init_data = { 0 };
        struct dc_sink *sink = NULL;
+
        sink_init_data.link = aconnector->dc_link;
        sink_init_data.sink_signal = aconnector->dc_link->connector_signal;
 
@@ -5732,7 +5675,7 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
                return &aconnector->freesync_vid_base;
 
        /* Find the preferred mode */
-       list_for_each_entry (m, list_head, head) {
+       list_for_each_entry(m, list_head, head) {
                if (m->type & DRM_MODE_TYPE_PREFERRED) {
                        m_pref = m;
                        break;
@@ -5756,7 +5699,7 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
         * For some monitors, preferred mode is not the mode with highest
         * supported refresh rate.
         */
-       list_for_each_entry (m, list_head, head) {
+       list_for_each_entry(m, list_head, head) {
                current_refresh  = drm_mode_vrefresh(m);
 
                if (m->hdisplay == m_pref->hdisplay &&
@@ -6028,7 +5971,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                 * This may not be an error, the use case is when we have no
                 * usermode calls to reset and set mode upon hotplug. In this
                 * case, we call set mode ourselves to restore the previous mode
-                * and the modelist may not be filled in in time.
+                * and the modelist may not be filled in time.
                 */
                DRM_DEBUG_DRIVER("No preferred mode found\n");
        } else {
@@ -6051,9 +5994,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
                drm_mode_set_crtcinfo(&mode, 0);
 
        /*
-       * If scaling is enabled and refresh rate didn't change
-       * we copy the vic and polarities of the old timings
-       */
+        * If scaling is enabled and refresh rate didn't change
+        * we copy the vic and polarities of the old timings
+        */
        if (!scale || mode_refresh != preferred_refresh)
                fill_stream_properties_from_drm_display_mode(
                        stream, &mode, &aconnector->base, con_state, NULL,
@@ -6817,6 +6760,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
 
        if (!state->duplicated) {
                int max_bpc = conn_state->max_requested_bpc;
+
                is_y420 = drm_mode_is_420_also(&connector->display_info, adjusted_mode) &&
                          aconnector->force_yuv420_output;
                color_depth = convert_color_depth_from_display_info(connector,
@@ -7135,7 +7079,7 @@ static bool is_duplicate_mode(struct amdgpu_dm_connector *aconnector,
 {
        struct drm_display_mode *m;
 
-       list_for_each_entry (m, &aconnector->base.probed_modes, head) {
+       list_for_each_entry(m, &aconnector->base.probed_modes, head) {
                if (drm_mode_equal(m, mode))
                        return true;
        }
@@ -7295,6 +7239,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
        aconnector->as_type = ADAPTIVE_SYNC_TYPE_NONE;
        memset(&aconnector->vsdb_info, 0, sizeof(aconnector->vsdb_info));
        mutex_init(&aconnector->hpd_lock);
+       mutex_init(&aconnector->handle_mst_msg_ready);
 
        /*
         * configure support HPD hot plug connector_>polled default value is 0
@@ -7454,7 +7399,6 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
 
        link->priv = aconnector;
 
-       DRM_DEBUG_DRIVER("%s()\n", __func__);
 
        i2c = create_i2c(link->ddc, link->link_index, &res);
        if (!i2c) {
@@ -8125,7 +8069,15 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
                 * Only allow immediate flips for fast updates that don't
                 * change memory domain, FB pitch, DCC state, rotation or
                 * mirroring.
+                *
+                * dm_crtc_helper_atomic_check() only accepts async flips with
+                * fast updates.
                 */
+               if (crtc->state->async_flip &&
+                   acrtc_state->update_type != UPDATE_TYPE_FAST)
+                       drm_warn_once(state->dev,
+                                     "[PLANE:%d:%s] async flip with non-fast update\n",
+                                     plane->base.id, plane->name);
                bundle->flip_addrs[planes_count].flip_immediate =
                        crtc->state->async_flip &&
                        acrtc_state->update_type == UPDATE_TYPE_FAST &&
@@ -8168,8 +8120,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
                         * DRI3/Present extension with defined target_msc.
                         */
                        last_flip_vblank = amdgpu_get_vblank_counter_kms(pcrtc);
-               }
-               else {
+               } else {
                        /* For variable refresh rate mode only:
                         * Get vblank of last completed flip to avoid > 1 vrr
                         * flips per video frame by use of throttling, but allow
@@ -8502,8 +8453,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
                dc_resource_state_copy_construct_current(dm->dc, dc_state);
        }
 
-       for_each_oldnew_crtc_in_state (state, crtc, old_crtc_state,
-                                      new_crtc_state, i) {
+       for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+                                     new_crtc_state, i) {
                struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
                dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
@@ -8526,9 +8477,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
                dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
 
                drm_dbg_state(state->dev,
-                       "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, "
-                       "planes_changed:%d, mode_changed:%d,active_changed:%d,"
-                       "connectors_changed:%d\n",
+                       "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, planes_changed:%d, mode_changed:%d,active_changed:%d,connectors_changed:%d\n",
                        acrtc->crtc_id,
                        new_crtc_state->enable,
                        new_crtc_state->active,
@@ -9104,8 +9053,8 @@ static int do_aquire_global_lock(struct drm_device *dev,
                                        &commit->flip_done, 10*HZ);
 
                if (ret == 0)
-                       DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done "
-                                 "timed out\n", crtc->base.id, crtc->name);
+                       DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done timed out\n",
+                                 crtc->base.id, crtc->name);
 
                drm_crtc_commit_put(commit);
        }
@@ -9190,7 +9139,8 @@ is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
        return false;
 }
 
-static void set_freesync_fixed_config(struct dm_crtc_state *dm_new_crtc_state) {
+static void set_freesync_fixed_config(struct dm_crtc_state *dm_new_crtc_state)
+{
        u64 num, den, res;
        struct drm_crtc_state *new_crtc_state = &dm_new_crtc_state->base;
 
@@ -9312,9 +9262,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
                goto skip_modeset;
 
        drm_dbg_state(state->dev,
-               "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, "
-               "planes_changed:%d, mode_changed:%d,active_changed:%d,"
-               "connectors_changed:%d\n",
+               "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, planes_changed:%d, mode_changed:%d,active_changed:%d,connectors_changed:%d\n",
                acrtc->crtc_id,
                new_crtc_state->enable,
                new_crtc_state->active,
@@ -9343,8 +9291,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
                                                     old_crtc_state)) {
                        new_crtc_state->mode_changed = false;
                        DRM_DEBUG_DRIVER(
-                               "Mode change not required for front porch change, "
-                               "setting mode_changed to %d",
+                               "Mode change not required for front porch change, setting mode_changed to %d",
                                new_crtc_state->mode_changed);
 
                        set_freesync_fixed_config(dm_new_crtc_state);
@@ -9356,9 +9303,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
                        struct drm_display_mode *high_mode;
 
                        high_mode = get_highest_refresh_rate_mode(aconnector, false);
-                       if (!drm_mode_equal(&new_crtc_state->mode, high_mode)) {
+                       if (!drm_mode_equal(&new_crtc_state->mode, high_mode))
                                set_freesync_fixed_config(dm_new_crtc_state);
-                       }
                }
 
                ret = dm_atomic_get_state(state, &dm_state);
@@ -9526,6 +9472,7 @@ static bool should_reset_plane(struct drm_atomic_state *state,
         */
        for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) {
                struct amdgpu_framebuffer *old_afb, *new_afb;
+
                if (other->type == DRM_PLANE_TYPE_CURSOR)
                        continue;
 
@@ -9624,11 +9571,12 @@ static int dm_check_cursor_fb(struct amdgpu_crtc *new_acrtc,
        }
 
        /* Core DRM takes care of checking FB modifiers, so we only need to
-        * check tiling flags when the FB doesn't have a modifier. */
+        * check tiling flags when the FB doesn't have a modifier.
+        */
        if (!(fb->flags & DRM_MODE_FB_MODIFIERS)) {
                if (adev->family < AMDGPU_FAMILY_AI) {
                        linear = AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_2D_TILED_THIN1 &&
-                                AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_1D_TILED_THIN1 &&
+                                AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_1D_TILED_THIN1 &&
                                 AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE) == 0;
                } else {
                        linear = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0;
@@ -9850,12 +9798,12 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state,
        /* On DCE and DCN there is no dedicated hardware cursor plane. We get a
         * cursor per pipe but it's going to inherit the scaling and
         * positioning from the underlying pipe. Check the cursor plane's
-        * blending properties match the underlying planes'. */
+        * blending properties match the underlying planes'.
+        */
 
        new_cursor_state = drm_atomic_get_new_plane_state(state, cursor);
-       if (!new_cursor_state || !new_cursor_state->fb) {
+       if (!new_cursor_state || !new_cursor_state->fb)
                return 0;
-       }
 
        dm_get_oriented_plane_size(new_cursor_state, &cursor_src_w, &cursor_src_h);
        cursor_scale_w = new_cursor_state->crtc_w * 1000 / cursor_src_w;
@@ -9900,6 +9848,7 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
        struct drm_connector_state *conn_state, *old_conn_state;
        struct amdgpu_dm_connector *aconnector = NULL;
        int i;
+
        for_each_oldnew_connector_in_state(state, connector, old_conn_state, conn_state, i) {
                if (!conn_state->crtc)
                        conn_state = old_conn_state;
@@ -10334,7 +10283,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
        }
 
        /* Store the overall update type for use later in atomic check. */
-       for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) {
+       for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
                struct dm_crtc_state *dm_new_crtc_state =
                        to_dm_crtc_state(new_crtc_state);
 
@@ -10356,7 +10305,7 @@ fail:
        else if (ret == -EINTR || ret == -EAGAIN || ret == -ERESTARTSYS)
                DRM_DEBUG_DRIVER("Atomic check stopped due to signal.\n");
        else
-               DRM_DEBUG_DRIVER("Atomic check failed with err: %d \n", ret);
+               DRM_DEBUG_DRIVER("Atomic check failed with err: %d\n", ret);
 
        trace_amdgpu_dm_atomic_check_finish(state, ret);
 
index 4561f55..9fb5bb3 100644 (file)
@@ -195,6 +195,11 @@ struct hpd_rx_irq_offload_work_queue {
         */
        bool is_handling_link_loss;
        /**
+        * @is_handling_mst_msg_rdy_event: Used to prevent inserting mst message
+        * ready event when we're already handling mst message ready event
+        */
+       bool is_handling_mst_msg_rdy_event;
+       /**
         * @aconnector: The aconnector that this work queue is attached to
         */
        struct amdgpu_dm_connector *aconnector;
@@ -638,6 +643,8 @@ struct amdgpu_dm_connector {
        struct drm_dp_mst_port *mst_output_port;
        struct amdgpu_dm_connector *mst_root;
        struct drm_dp_aux *dsc_aux;
+       struct mutex handle_mst_msg_ready;
+
        /* TODO see if we can merge with ddc_bus or make a dm_connector */
        struct amdgpu_i2c_adapter *i2c;
 
index 440fc08..30d4c6f 100644 (file)
@@ -398,6 +398,18 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
                return -EINVAL;
        }
 
+       /*
+        * Only allow async flips for fast updates that don't change the FB
+        * pitch, the DCC state, rotation, etc.
+        */
+       if (crtc_state->async_flip &&
+           dm_crtc_state->update_type != UPDATE_TYPE_FAST) {
+               drm_dbg_atomic(crtc->dev,
+                              "[CRTC:%d:%s] async flips are only supported for fast updates\n",
+                              crtc->base.id, crtc->name);
+               return -EINVAL;
+       }
+
        /* In some use cases, like reset, no stream is attached */
        if (!dm_crtc_state->stream)
                return 0;
index 46d0a8f..b885c39 100644 (file)
@@ -619,8 +619,118 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
        return connector;
 }
 
+void dm_handle_mst_sideband_msg_ready_event(
+       struct drm_dp_mst_topology_mgr *mgr,
+       enum mst_msg_ready_type msg_rdy_type)
+{
+       uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
+       uint8_t dret;
+       bool new_irq_handled = false;
+       int dpcd_addr;
+       uint8_t dpcd_bytes_to_read;
+       const uint8_t max_process_count = 30;
+       uint8_t process_count = 0;
+       u8 retry;
+       struct amdgpu_dm_connector *aconnector =
+                       container_of(mgr, struct amdgpu_dm_connector, mst_mgr);
+
+
+       const struct dc_link_status *link_status = dc_link_get_status(aconnector->dc_link);
+
+       if (link_status->dpcd_caps->dpcd_rev.raw < 0x12) {
+               dpcd_bytes_to_read = DP_LANE0_1_STATUS - DP_SINK_COUNT;
+               /* DPCD 0x200 - 0x201 for downstream IRQ */
+               dpcd_addr = DP_SINK_COUNT;
+       } else {
+               dpcd_bytes_to_read = DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI;
+               /* DPCD 0x2002 - 0x2005 for downstream IRQ */
+               dpcd_addr = DP_SINK_COUNT_ESI;
+       }
+
+       mutex_lock(&aconnector->handle_mst_msg_ready);
+
+       while (process_count < max_process_count) {
+               u8 ack[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = {};
+
+               process_count++;
+
+               dret = drm_dp_dpcd_read(
+                       &aconnector->dm_dp_aux.aux,
+                       dpcd_addr,
+                       esi,
+                       dpcd_bytes_to_read);
+
+               if (dret != dpcd_bytes_to_read) {
+                       DRM_DEBUG_KMS("DPCD read and acked number is not as expected!");
+                       break;
+               }
+
+               DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], esi[2]);
+
+               switch (msg_rdy_type) {
+               case DOWN_REP_MSG_RDY_EVENT:
+                       /* Only handle DOWN_REP_MSG_RDY case*/
+                       esi[1] &= DP_DOWN_REP_MSG_RDY;
+                       break;
+               case UP_REQ_MSG_RDY_EVENT:
+                       /* Only handle UP_REQ_MSG_RDY case*/
+                       esi[1] &= DP_UP_REQ_MSG_RDY;
+                       break;
+               default:
+                       /* Handle both cases*/
+                       esi[1] &= (DP_DOWN_REP_MSG_RDY | DP_UP_REQ_MSG_RDY);
+                       break;
+               }
+
+               if (!esi[1])
+                       break;
+
+               /* handle MST irq */
+               if (aconnector->mst_mgr.mst_state)
+                       drm_dp_mst_hpd_irq_handle_event(&aconnector->mst_mgr,
+                                                esi,
+                                                ack,
+                                                &new_irq_handled);
+
+               if (new_irq_handled) {
+                       /* ACK at DPCD to notify down stream */
+                       for (retry = 0; retry < 3; retry++) {
+                               ssize_t wret;
+
+                               wret = drm_dp_dpcd_writeb(&aconnector->dm_dp_aux.aux,
+                                                         dpcd_addr + 1,
+                                                         ack[1]);
+                               if (wret == 1)
+                                       break;
+                       }
+
+                       if (retry == 3) {
+                               DRM_ERROR("Failed to ack MST event.\n");
+                               break;
+                       }
+
+                       drm_dp_mst_hpd_irq_send_new_request(&aconnector->mst_mgr);
+
+                       new_irq_handled = false;
+               } else {
+                       break;
+               }
+       }
+
+       mutex_unlock(&aconnector->handle_mst_msg_ready);
+
+       if (process_count == max_process_count)
+               DRM_DEBUG_DRIVER("Loop exceeded max iterations\n");
+}
+
+static void dm_handle_mst_down_rep_msg_ready(struct drm_dp_mst_topology_mgr *mgr)
+{
+       dm_handle_mst_sideband_msg_ready_event(mgr, DOWN_REP_MSG_RDY_EVENT);
+}
+
 static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
        .add_connector = dm_dp_add_mst_connector,
+       .poll_hpd_irq = dm_handle_mst_down_rep_msg_ready,
 };
 
 void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
@@ -1210,7 +1320,7 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
                if (computed_streams[i])
                        continue;
 
-               if (!res_pool->funcs->remove_stream_from_ctx ||
+               if (res_pool->funcs->remove_stream_from_ctx &&
                    res_pool->funcs->remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
                        return -EINVAL;
 
index 1e4ede1..37c820a 100644 (file)
 #define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B     1031
 #define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B  1000
 
+enum mst_msg_ready_type {
+       NONE_MSG_RDY_EVENT = 0,
+       DOWN_REP_MSG_RDY_EVENT = 1,
+       UP_REQ_MSG_RDY_EVENT = 2,
+       DOWN_OR_UP_MSG_RDY_EVENT = 3
+};
+
 struct amdgpu_display_manager;
 struct amdgpu_dm_connector;
 
@@ -61,6 +68,10 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
 void
 dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev);
 
+void dm_handle_mst_sideband_msg_ready_event(
+       struct drm_dp_mst_topology_mgr *mgr,
+       enum mst_msg_ready_type msg_rdy_type);
+
 struct dsc_mst_fairness_vars {
        int pbn;
        bool dsc_enabled;
index 7ccd969..3db4ef5 100644 (file)
@@ -87,6 +87,11 @@ static int dcn31_get_active_display_cnt_wa(
                                stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
                                stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
                        tmds_present = true;
+
+               /* Checking stream / link detection ensuring that PHY is active*/
+               if (dc_is_dp_signal(stream->signal) && !stream->dpms_off)
+                       display_count++;
+
        }
 
        for (i = 0; i < dc->link_count; i++) {
index 6c9ca43..6966420 100644 (file)
@@ -777,7 +777,8 @@ void dce110_edp_wait_for_hpd_ready(
        dal_gpio_destroy_irq(&hpd);
 
        /* ensure that the panel is detected */
-       ASSERT(edp_hpd_high);
+       if (!edp_hpd_high)
+               DC_LOG_DC("%s: wait timed out!\n", __func__);
 }
 
 void dce110_edp_power_control(
@@ -1792,10 +1793,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
                        hws->funcs.edp_backlight_control(edp_link_with_sink, false);
                }
                /*resume from S3, no vbios posting, no need to power down again*/
+               clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
+
                power_down_all_hw_blocks(dc);
                disable_vga_and_power_gate_all_controllers(dc);
                if (edp_link_with_sink && !keep_edp_vdd_on)
                        dc->hwss.edp_power_control(edp_link_with_sink, false);
+               clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
        }
        bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1);
 }
index a503090..9834b75 100644 (file)
@@ -3278,7 +3278,8 @@ void dcn10_wait_for_mpcc_disconnect(
                if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) {
                        struct hubp *hubp = get_hubp_by_inst(res_pool, mpcc_inst);
 
-                       if (pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg))
+                       if (pipe_ctx->stream_res.tg &&
+                               pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg))
                                res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);
                        pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false;
                        hubp->funcs->set_blank(hubp, true);
index 4cc8de2..9f2e243 100644 (file)
@@ -712,7 +712,7 @@ static const struct dc_debug_options debug_defaults_drv = {
                .timing_trace = false,
                .clock_trace = true,
                .disable_pplib_clock_request = true,
-               .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+               .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
                .force_single_disp_pipe_split = false,
                .disable_dcc = DCC_ENABLE,
                .vsr_support = true,
index e5b7ef7..50dc834 100644 (file)
@@ -357,8 +357,11 @@ void dpp3_set_cursor_attributes(
        int cur_rom_en = 0;
 
        if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
-               color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA)
-               cur_rom_en = 1;
+               color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
+               if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
+                       cur_rom_en = 1;
+               }
+       }
 
        REG_UPDATE_3(CURSOR0_CONTROL,
                        CUR0_MODE, color_format,
index dfb8f62..5bf4d0a 100644 (file)
@@ -215,7 +215,7 @@ void optc3_set_odm_bypass(struct timing_generator *optc,
        optc1->opp_count = 1;
 }
 
-static void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
+void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
                struct dc_crtc_timing *timing)
 {
        struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -293,7 +293,7 @@ static void optc3_set_timing_double_buffer(struct timing_generator *optc, bool e
                   OTG_DRR_TIMING_DBUF_UPDATE_MODE, mode);
 }
 
-static void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc)
+void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc)
 {
        struct optc *optc1 = DCN10TG_FROM_TG(optc);
 
index fb06dc9..d3a056c 100644 (file)
@@ -351,6 +351,9 @@ void optc3_set_timing_db_mode(struct timing_generator *optc, bool enable);
 
 void optc3_set_odm_bypass(struct timing_generator *optc,
                const struct dc_crtc_timing *dc_crtc_timing);
+void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
+               struct dc_crtc_timing *timing);
+void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc);
 void optc3_tg_init(struct timing_generator *optc);
 void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
 #endif /* __DC_OPTC_DCN30_H__ */
index 7aa628c..9002cb1 100644 (file)
@@ -11,7 +11,8 @@
 # Makefile for dcn30.
 
 DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \
-               dcn301_dio_link_encoder.o dcn301_hwseq.o dcn301_panel_cntl.o dcn301_hubbub.o
+               dcn301_dio_link_encoder.o dcn301_hwseq.o dcn301_panel_cntl.o dcn301_hubbub.o \
+               dcn301_optc.o
 
 AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301))
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c
new file mode 100644 (file)
index 0000000..b3cfcb8
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "dcn301_optc.h"
+#include "dc.h"
+#include "dcn_calc_math.h"
+#include "dc_dmub_srv.h"
+
+#include "dml/dcn30/dcn30_fpu.h"
+#include "dc_trace.h"
+
+#define REG(reg)\
+       optc1->tg_regs->reg
+
+#define CTX \
+       optc1->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+       optc1->tg_shift->field_name, optc1->tg_mask->field_name
+
+
+/**
+ * optc301_set_drr() - Program dynamic refresh rate registers m_OTGx_OTG_V_TOTAL_*.
+ *
+ * @optc: timing_generator instance.
+ * @params: parameters used for Dynamic Refresh Rate.
+ */
+void optc301_set_drr(
+       struct timing_generator *optc,
+       const struct drr_params *params)
+{
+       struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+       if (params != NULL &&
+               params->vertical_total_max > 0 &&
+               params->vertical_total_min > 0) {
+
+               if (params->vertical_total_mid != 0) {
+
+                       REG_SET(OTG_V_TOTAL_MID, 0,
+                               OTG_V_TOTAL_MID, params->vertical_total_mid - 1);
+
+                       REG_UPDATE_2(OTG_V_TOTAL_CONTROL,
+                                       OTG_VTOTAL_MID_REPLACING_MAX_EN, 1,
+                                       OTG_VTOTAL_MID_FRAME_NUM,
+                                       (uint8_t)params->vertical_total_mid_frame_num);
+
+               }
+
+               optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1);
+
+               REG_UPDATE_5(OTG_V_TOTAL_CONTROL,
+                               OTG_V_TOTAL_MIN_SEL, 1,
+                               OTG_V_TOTAL_MAX_SEL, 1,
+                               OTG_FORCE_LOCK_ON_EVENT, 0,
+                               OTG_SET_V_TOTAL_MIN_MASK_EN, 0,
+                               OTG_SET_V_TOTAL_MIN_MASK, 0);
+               // Setup manual flow control for EOF via TRIG_A
+               optc->funcs->setup_manual_trigger(optc);
+
+       } else {
+               REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+                               OTG_SET_V_TOTAL_MIN_MASK, 0,
+                               OTG_V_TOTAL_MIN_SEL, 0,
+                               OTG_V_TOTAL_MAX_SEL, 0,
+                               OTG_FORCE_LOCK_ON_EVENT, 0);
+
+               optc->funcs->set_vtotal_min_max(optc, 0, 0);
+       }
+}
+
+
+void optc301_setup_manual_trigger(struct timing_generator *optc)
+{
+       struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+       REG_SET_8(OTG_TRIGA_CNTL, 0,
+                       OTG_TRIGA_SOURCE_SELECT, 21,
+                       OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst,
+                       OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1,
+                       OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0,
+                       OTG_TRIGA_POLARITY_SELECT, 0,
+                       OTG_TRIGA_FREQUENCY_SELECT, 0,
+                       OTG_TRIGA_DELAY, 0,
+                       OTG_TRIGA_CLEAR, 1);
+}
+
+static struct timing_generator_funcs dcn30_tg_funcs = {
+               .validate_timing = optc1_validate_timing,
+               .program_timing = optc1_program_timing,
+               .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
+               .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1,
+               .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2,
+               .program_global_sync = optc1_program_global_sync,
+               .enable_crtc = optc2_enable_crtc,
+               .disable_crtc = optc1_disable_crtc,
+               /* used by enable_timing_synchronization. Not need for FPGA */
+               .is_counter_moving = optc1_is_counter_moving,
+               .get_position = optc1_get_position,
+               .get_frame_count = optc1_get_vblank_counter,
+               .get_scanoutpos = optc1_get_crtc_scanoutpos,
+               .get_otg_active_size = optc1_get_otg_active_size,
+               .set_early_control = optc1_set_early_control,
+               /* used by enable_timing_synchronization. Not need for FPGA */
+               .wait_for_state = optc1_wait_for_state,
+               .set_blank_color = optc3_program_blank_color,
+               .did_triggered_reset_occur = optc1_did_triggered_reset_occur,
+               .triplebuffer_lock = optc3_triplebuffer_lock,
+               .triplebuffer_unlock = optc2_triplebuffer_unlock,
+               .enable_reset_trigger = optc1_enable_reset_trigger,
+               .enable_crtc_reset = optc1_enable_crtc_reset,
+               .disable_reset_trigger = optc1_disable_reset_trigger,
+               .lock = optc3_lock,
+               .unlock = optc1_unlock,
+               .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
+               .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
+               .enable_optc_clock = optc1_enable_optc_clock,
+               .set_drr = optc301_set_drr,
+               .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
+               .set_vtotal_min_max = optc3_set_vtotal_min_max,
+               .set_static_screen_control = optc1_set_static_screen_control,
+               .program_stereo = optc1_program_stereo,
+               .is_stereo_left_eye = optc1_is_stereo_left_eye,
+               .tg_init = optc3_tg_init,
+               .is_tg_enabled = optc1_is_tg_enabled,
+               .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred,
+               .clear_optc_underflow = optc1_clear_optc_underflow,
+               .setup_global_swap_lock = NULL,
+               .get_crc = optc1_get_crc,
+               .configure_crc = optc2_configure_crc,
+               .set_dsc_config = optc3_set_dsc_config,
+               .get_dsc_status = optc2_get_dsc_status,
+               .set_dwb_source = NULL,
+               .set_odm_bypass = optc3_set_odm_bypass,
+               .set_odm_combine = optc3_set_odm_combine,
+               .get_optc_source = optc2_get_optc_source,
+               .set_out_mux = optc3_set_out_mux,
+               .set_drr_trigger_window = optc3_set_drr_trigger_window,
+               .set_vtotal_change_limit = optc3_set_vtotal_change_limit,
+               .set_gsl = optc2_set_gsl,
+               .set_gsl_source_select = optc2_set_gsl_source_select,
+               .set_vtg_params = optc1_set_vtg_params,
+               .program_manual_trigger = optc2_program_manual_trigger,
+               .setup_manual_trigger = optc301_setup_manual_trigger,
+               .get_hw_timing = optc1_get_hw_timing,
+               .wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear,
+};
+
+void dcn301_timing_generator_init(struct optc *optc1)
+{
+       optc1->base.funcs = &dcn30_tg_funcs;
+
+       optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1;
+       optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1;
+
+       optc1->min_h_blank = 32;
+       optc1->min_v_blank = 3;
+       optc1->min_v_blank_interlace = 5;
+       optc1->min_h_sync_width = 4;
+       optc1->min_v_sync_width = 1;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h
new file mode 100644 (file)
index 0000000..b495856
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_OPTC_DCN301_H__
+#define __DC_OPTC_DCN301_H__
+
+#include "dcn20/dcn20_optc.h"
+#include "dcn30/dcn30_optc.h"
+
+void dcn301_timing_generator_init(struct optc *optc1);
+void optc301_setup_manual_trigger(struct timing_generator *optc);
+void optc301_set_drr(struct timing_generator *optc, const struct drr_params *params);
+
+#endif /* __DC_OPTC_DCN301_H__ */
index 3485fbb..1bee9a4 100644 (file)
@@ -42,7 +42,7 @@
 #include "dcn30/dcn30_hubp.h"
 #include "irq/dcn30/irq_service_dcn30.h"
 #include "dcn30/dcn30_dpp.h"
-#include "dcn30/dcn30_optc.h"
+#include "dcn301/dcn301_optc.h"
 #include "dcn20/dcn20_hwseq.h"
 #include "dcn30/dcn30_hwseq.h"
 #include "dce110/dce110_hw_sequencer.h"
@@ -855,7 +855,7 @@ static struct timing_generator *dcn301_timing_generator_create(
        tgn10->tg_shift = &optc_shift;
        tgn10->tg_mask = &optc_mask;
 
-       dcn30_timing_generator_init(tgn10);
+       dcn301_timing_generator_init(tgn10);
 
        return &tgn10->base;
 }
index 45956ef..131b8b8 100644 (file)
@@ -65,7 +65,7 @@ static const struct dc_debug_options debug_defaults_drv = {
                .timing_trace = false,
                .clock_trace = true,
                .disable_pplib_clock_request = true,
-               .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+               .pipe_split_policy = MPC_SPLIT_AVOID,
                .force_single_disp_pipe_split = false,
                .disable_dcc = DCC_ENABLE,
                .vsr_support = true,
index 65c1d75..01cc679 100644 (file)
@@ -84,7 +84,8 @@ static enum phyd32clk_clock_source get_phy_mux_symclk(
                struct dcn_dccg *dccg_dcn,
                enum phyd32clk_clock_source src)
 {
-       if (dccg_dcn->base.ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
+       if (dccg_dcn->base.ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
+                       dccg_dcn->base.ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
                if (src == PHYD32CLKC)
                        src = PHYD32CLKF;
                if (src == PHYD32CLKD)
index 11e28e0..61ceff6 100644 (file)
@@ -49,7 +49,10 @@ static void dccg32_trigger_dio_fifo_resync(
        uint32_t dispclk_rdivider_value = 0;
 
        REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, &dispclk_rdivider_value);
-       REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value);
+
+       /* Not valid for the WDIVIDER to be set to 0 */
+       if (dispclk_rdivider_value != 0)
+               REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value);
 }
 
 static void dccg32_get_pixel_rate_div(
index d9e049e..ed8ddb7 100644 (file)
@@ -295,7 +295,11 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
                pipe = &res_ctx->pipe_ctx[i];
                timing = &pipe->stream->timing;
 
-               pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+               if (pipe->stream->adjust.v_total_min != 0)
+                       pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+               else
+                       pipes[pipe_cnt].pipe.dest.vtotal = timing->v_total;
+
                pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
                pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, dcn3_14_ip.VBlankNomDefaultUS);
                pipes[pipe_cnt].pipe.dest.vblank_nom = max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width);
index 6841a4b..1cb4022 100644 (file)
@@ -1798,17 +1798,6 @@ static int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
        return result;
 }
 
-static bool intel_core_rkl_chk(void)
-{
-#if IS_ENABLED(CONFIG_X86_64)
-       struct cpuinfo_x86 *c = &cpu_data(0);
-
-       return (c->x86 == 6 && c->x86_model == INTEL_FAM6_ROCKETLAKE);
-#else
-       return false;
-#endif
-}
-
 static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 {
        struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -1835,7 +1824,8 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
        data->mclk_dpm_key_disabled = hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true;
        data->sclk_dpm_key_disabled = hwmgr->feature_mask & PP_SCLK_DPM_MASK ? false : true;
        data->pcie_dpm_key_disabled =
-               intel_core_rkl_chk() || !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
+               !amdgpu_device_pcie_dynamic_switching_supported() ||
+               !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
        /* need to set voltage control types before EVV patching */
        data->voltage_control = SMU7_VOLTAGE_CONTROL_NONE;
        data->vddci_control = SMU7_VOLTAGE_CONTROL_NONE;
index ce41a83..222af2f 100644 (file)
@@ -1581,9 +1581,9 @@ static int smu_disable_dpms(struct smu_context *smu)
 
        /*
         * For SMU 13.0.4/11, PMFW will handle the features disablement properly
-        * for gpu reset case. Driver involvement is unnecessary.
+        * for gpu reset and S0i3 cases. Driver involvement is unnecessary.
         */
-       if (amdgpu_in_reset(adev)) {
+       if (amdgpu_in_reset(adev) || adev->in_s0ix) {
                switch (adev->ip_versions[MP1_HWIP][0]) {
                case IP_VERSION(13, 0, 4):
                case IP_VERSION(13, 0, 11):
index 6a0ac0b..355c156 100644 (file)
@@ -295,5 +295,9 @@ int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu,
                                        uint32_t *size,
                                        uint32_t pptable_id);
 
+int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
+                                    uint32_t pcie_gen_cap,
+                                    uint32_t pcie_width_cap);
+
 #endif
 #endif
index 9cd0051..3bb1839 100644 (file)
@@ -2113,7 +2113,6 @@ static int arcturus_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        mutex_lock(&adev->pm.mutex);
        r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
-       mutex_unlock(&adev->pm.mutex);
        if (r)
                goto fail;
 
@@ -2130,6 +2129,7 @@ static int arcturus_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        r = num_msgs;
 fail:
+       mutex_unlock(&adev->pm.mutex);
        kfree(req);
        return r;
 }
index c94d825..95f6d82 100644 (file)
@@ -3021,7 +3021,6 @@ static int navi10_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        mutex_lock(&adev->pm.mutex);
        r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
-       mutex_unlock(&adev->pm.mutex);
        if (r)
                goto fail;
 
@@ -3038,6 +3037,7 @@ static int navi10_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        r = num_msgs;
 fail:
+       mutex_unlock(&adev->pm.mutex);
        kfree(req);
        return r;
 }
index f7ed3e6..f0800c0 100644 (file)
@@ -588,7 +588,9 @@ err0_out:
        return -ENOMEM;
 }
 
-static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu)
+static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu,
+                                                          bool use_metrics_v3,
+                                                          bool use_metrics_v2)
 {
        struct smu_table_context *smu_table= &smu->smu_table;
        SmuMetricsExternal_t *metrics_ext =
@@ -596,13 +598,11 @@ static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *s
        uint32_t throttler_status = 0;
        int i;
 
-       if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
-            (smu->smc_fw_version >= 0x3A4900)) {
+       if (use_metrics_v3) {
                for (i = 0; i < THROTTLER_COUNT; i++)
                        throttler_status |=
                                (metrics_ext->SmuMetrics_V3.ThrottlingPercentage[i] ? 1U << i : 0);
-       } else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
-            (smu->smc_fw_version >= 0x3A4300)) {
+       } else if (use_metrics_v2) {
                for (i = 0; i < THROTTLER_COUNT; i++)
                        throttler_status |=
                                (metrics_ext->SmuMetrics_V2.ThrottlingPercentage[i] ? 1U << i : 0);
@@ -864,7 +864,7 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
                        metrics->TemperatureVrSoc) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
                break;
        case METRICS_THROTTLER_STATUS:
-               *value = sienna_cichlid_get_throttler_status_locked(smu);
+               *value = sienna_cichlid_get_throttler_status_locked(smu, use_metrics_v3, use_metrics_v2);
                break;
        case METRICS_CURR_FANSPEED:
                *value = use_metrics_v3 ? metrics_v3->CurrFanSpeed :
@@ -1927,12 +1927,16 @@ static int sienna_cichlid_read_sensor(struct smu_context *smu,
                *size = 4;
                break;
        case AMDGPU_PP_SENSOR_GFX_MCLK:
-               ret = sienna_cichlid_get_current_clk_freq_by_table(smu, SMU_UCLK, (uint32_t *)data);
+               ret = sienna_cichlid_get_smu_metrics_data(smu,
+                                                         METRICS_CURR_UCLK,
+                                                         (uint32_t *)data);
                *(uint32_t *)data *= 100;
                *size = 4;
                break;
        case AMDGPU_PP_SENSOR_GFX_SCLK:
-               ret = sienna_cichlid_get_current_clk_freq_by_table(smu, SMU_GFXCLK, (uint32_t *)data);
+               ret = sienna_cichlid_get_smu_metrics_data(smu,
+                                                         METRICS_AVERAGE_GFXCLK,
+                                                         (uint32_t *)data);
                *(uint32_t *)data *= 100;
                *size = 4;
                break;
@@ -2077,89 +2081,36 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context
        return ret;
 }
 
-static void sienna_cichlid_get_override_pcie_settings(struct smu_context *smu,
-                                                     uint32_t *gen_speed_override,
-                                                     uint32_t *lane_width_override)
-{
-       struct amdgpu_device *adev = smu->adev;
-
-       *gen_speed_override = 0xff;
-       *lane_width_override = 0xff;
-
-       switch (adev->pdev->device) {
-       case 0x73A0:
-       case 0x73A1:
-       case 0x73A2:
-       case 0x73A3:
-       case 0x73AB:
-       case 0x73AE:
-               /* Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */
-               *lane_width_override = 6;
-               break;
-       case 0x73E0:
-       case 0x73E1:
-       case 0x73E3:
-               *lane_width_override = 4;
-               break;
-       case 0x7420:
-       case 0x7421:
-       case 0x7422:
-       case 0x7423:
-       case 0x7424:
-               *lane_width_override = 3;
-               break;
-       default:
-               break;
-       }
-}
-
-#define MAX(a, b)      ((a) > (b) ? (a) : (b))
-
 static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
                                         uint32_t pcie_gen_cap,
                                         uint32_t pcie_width_cap)
 {
        struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
        struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table;
-       uint32_t gen_speed_override, lane_width_override;
-       uint8_t *table_member1, *table_member2;
-       uint32_t min_gen_speed, max_gen_speed;
-       uint32_t min_lane_width, max_lane_width;
-       uint32_t smu_pcie_arg;
+       u32 smu_pcie_arg;
        int ret, i;
 
-       GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1);
-       GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2);
+       /* PCIE gen speed and lane width override */
+       if (!amdgpu_device_pcie_dynamic_switching_supported()) {
+               if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap)
+                       pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 1];
 
-       sienna_cichlid_get_override_pcie_settings(smu,
-                                                 &gen_speed_override,
-                                                 &lane_width_override);
+               if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap)
+                       pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS - 1];
 
-       /* PCIE gen speed override */
-       if (gen_speed_override != 0xff) {
-               min_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
-               max_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
-       } else {
-               min_gen_speed = MAX(0, table_member1[0]);
-               max_gen_speed = MIN(pcie_gen_cap, table_member1[1]);
-               min_gen_speed = min_gen_speed > max_gen_speed ?
-                               max_gen_speed : min_gen_speed;
-       }
-       pcie_table->pcie_gen[0] = min_gen_speed;
-       pcie_table->pcie_gen[1] = max_gen_speed;
-
-       /* PCIE lane width override */
-       if (lane_width_override != 0xff) {
-               min_lane_width = MIN(pcie_width_cap, lane_width_override);
-               max_lane_width = MIN(pcie_width_cap, lane_width_override);
+               /* Force all levels to use the same settings */
+               for (i = 0; i < NUM_LINK_LEVELS; i++) {
+                       pcie_table->pcie_gen[i] = pcie_gen_cap;
+                       pcie_table->pcie_lane[i] = pcie_width_cap;
+               }
        } else {
-               min_lane_width = MAX(1, table_member2[0]);
-               max_lane_width = MIN(pcie_width_cap, table_member2[1]);
-               min_lane_width = min_lane_width > max_lane_width ?
-                                max_lane_width : min_lane_width;
+               for (i = 0; i < NUM_LINK_LEVELS; i++) {
+                       if (pcie_table->pcie_gen[i] > pcie_gen_cap)
+                               pcie_table->pcie_gen[i] = pcie_gen_cap;
+                       if (pcie_table->pcie_lane[i] > pcie_width_cap)
+                               pcie_table->pcie_lane[i] = pcie_width_cap;
+               }
        }
-       pcie_table->pcie_lane[0] = min_lane_width;
-       pcie_table->pcie_lane[1] = max_lane_width;
 
        for (i = 0; i < NUM_LINK_LEVELS; i++) {
                smu_pcie_arg = (i << 16 |
@@ -3842,7 +3793,6 @@ static int sienna_cichlid_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        mutex_lock(&adev->pm.mutex);
        r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
-       mutex_unlock(&adev->pm.mutex);
        if (r)
                goto fail;
 
@@ -3859,6 +3809,7 @@ static int sienna_cichlid_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        r = num_msgs;
 fail:
+       mutex_unlock(&adev->pm.mutex);
        kfree(req);
        return r;
 }
@@ -4066,7 +4017,7 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
        gpu_metrics->current_dclk1 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_1] :
                use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : metrics->CurrClock[PPCLK_DCLK_1];
 
-       gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu);
+       gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu, use_metrics_v3, use_metrics_v2);
        gpu_metrics->indep_throttle_status =
                        smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status,
                                                           sienna_cichlid_throttler_map);
index e80f122..ce50ef4 100644 (file)
@@ -1525,7 +1525,6 @@ static int aldebaran_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        mutex_lock(&adev->pm.mutex);
        r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
-       mutex_unlock(&adev->pm.mutex);
        if (r)
                goto fail;
 
@@ -1542,6 +1541,7 @@ static int aldebaran_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        r = num_msgs;
 fail:
+       mutex_unlock(&adev->pm.mutex);
        kfree(req);
        return r;
 }
index 3856da6..9b62b45 100644 (file)
@@ -2424,3 +2424,51 @@ int smu_v13_0_mode1_reset(struct smu_context *smu)
 
        return ret;
 }
+
+int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
+                                    uint32_t pcie_gen_cap,
+                                    uint32_t pcie_width_cap)
+{
+       struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
+       struct smu_13_0_pcie_table *pcie_table =
+                               &dpm_context->dpm_tables.pcie_table;
+       int num_of_levels = pcie_table->num_of_link_levels;
+       uint32_t smu_pcie_arg;
+       int ret, i;
+
+       if (!amdgpu_device_pcie_dynamic_switching_supported()) {
+               if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap)
+                       pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1];
+
+               if (pcie_table->pcie_lane[num_of_levels - 1] < pcie_width_cap)
+                       pcie_width_cap = pcie_table->pcie_lane[num_of_levels - 1];
+
+               /* Force all levels to use the same settings */
+               for (i = 0; i < num_of_levels; i++) {
+                       pcie_table->pcie_gen[i] = pcie_gen_cap;
+                       pcie_table->pcie_lane[i] = pcie_width_cap;
+               }
+       } else {
+               for (i = 0; i < num_of_levels; i++) {
+                       if (pcie_table->pcie_gen[i] > pcie_gen_cap)
+                               pcie_table->pcie_gen[i] = pcie_gen_cap;
+                       if (pcie_table->pcie_lane[i] > pcie_width_cap)
+                               pcie_table->pcie_lane[i] = pcie_width_cap;
+               }
+       }
+
+       for (i = 0; i < num_of_levels; i++) {
+               smu_pcie_arg = i << 16;
+               smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
+               smu_pcie_arg |= pcie_table->pcie_lane[i];
+
+               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                     SMU_MSG_OverridePcieParameters,
+                                                     smu_pcie_arg,
+                                                     NULL);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
index 124287c..0fb6be1 100644 (file)
@@ -332,10 +332,13 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
                table_context->power_play_table;
        struct smu_baco_context *smu_baco = &smu->smu_baco;
        PPTable_t *pptable = smu->smu_table.driver_pptable;
+#if 0
+       PPTable_t *pptable = smu->smu_table.driver_pptable;
        const OverDriveLimits_t * const overdrive_upperlimits =
                                &pptable->SkuTable.OverDriveLimitsBasicMax;
        const OverDriveLimits_t * const overdrive_lowerlimits =
                                &pptable->SkuTable.OverDriveLimitsMin;
+#endif
 
        if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC)
                smu->dc_controlled_by_gpio = true;
@@ -347,18 +350,30 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
        if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
                smu_baco->maco_support = true;
 
+       /*
+        * We are in the transition to a new OD mechanism.
+        * Disable the OD feature support for SMU13 temporarily.
+        * TODO: get this reverted when new OD mechanism online
+        */
+#if 0
        if (!overdrive_lowerlimits->FeatureCtrlMask ||
            !overdrive_upperlimits->FeatureCtrlMask)
                smu->od_enabled = false;
 
-       table_context->thermal_controller_type =
-               powerplay_table->thermal_controller_type;
-
        /*
         * Instead of having its own buffer space and get overdrive_table copied,
         * smu->od_settings just points to the actual overdrive_table
         */
        smu->od_settings = &powerplay_table->overdrive_table;
+#else
+       smu->od_enabled = false;
+#endif
+
+       table_context->thermal_controller_type =
+               powerplay_table->thermal_controller_type;
+
+       smu->adev->pm.no_fan =
+               !(pptable->SkuTable.FeaturesToRun[0] & (1 << FEATURE_FAN_CONTROL_BIT));
 
        return 0;
 }
@@ -1140,7 +1155,6 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
                (OverDriveTableExternal_t *)smu->smu_table.overdrive_table;
        struct smu_13_0_dpm_table *single_dpm_table;
        struct smu_13_0_pcie_table *pcie_table;
-       const int link_width[] = {0, 1, 2, 4, 8, 12, 16};
        uint32_t gen_speed, lane_width;
        int i, curr_freq, size = 0;
        int32_t min_value, max_value;
@@ -1256,7 +1270,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
                                        (pcie_table->pcie_lane[i] == 6) ? "x16" : "",
                                        pcie_table->clk_freq[i],
                                        (gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
-                                       (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ?
+                                       (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ?
                                        "*" : "");
                break;
 
@@ -1645,37 +1659,6 @@ static int smu_v13_0_0_force_clk_levels(struct smu_context *smu,
        return ret;
 }
 
-static int smu_v13_0_0_update_pcie_parameters(struct smu_context *smu,
-                                             uint32_t pcie_gen_cap,
-                                             uint32_t pcie_width_cap)
-{
-       struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
-       struct smu_13_0_pcie_table *pcie_table =
-                               &dpm_context->dpm_tables.pcie_table;
-       uint32_t smu_pcie_arg;
-       int ret, i;
-
-       for (i = 0; i < pcie_table->num_of_link_levels; i++) {
-               if (pcie_table->pcie_gen[i] > pcie_gen_cap)
-                       pcie_table->pcie_gen[i] = pcie_gen_cap;
-               if (pcie_table->pcie_lane[i] > pcie_width_cap)
-                       pcie_table->pcie_lane[i] = pcie_width_cap;
-
-               smu_pcie_arg = i << 16;
-               smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
-               smu_pcie_arg |= pcie_table->pcie_lane[i];
-
-               ret = smu_cmn_send_smc_msg_with_param(smu,
-                                                     SMU_MSG_OverridePcieParameters,
-                                                     smu_pcie_arg,
-                                                     NULL);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
 static const struct smu_temperature_range smu13_thermal_policy[] = {
        {-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
        { 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
@@ -1765,7 +1748,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu,
        gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency;
        gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency;
 
-       gpu_metrics->current_gfxclk = metrics->CurrClock[PPCLK_GFXCLK];
+       gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency;
        gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK];
        gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK];
        gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0];
@@ -2320,7 +2303,6 @@ static int smu_v13_0_0_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        mutex_lock(&adev->pm.mutex);
        r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
-       mutex_unlock(&adev->pm.mutex);
        if (r)
                goto fail;
 
@@ -2337,6 +2319,7 @@ static int smu_v13_0_0_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        r = num_msgs;
 fail:
+       mutex_unlock(&adev->pm.mutex);
        kfree(req);
        return r;
 }
@@ -2654,7 +2637,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
        .feature_is_enabled = smu_cmn_feature_is_enabled,
        .print_clk_levels = smu_v13_0_0_print_clk_levels,
        .force_clk_levels = smu_v13_0_0_force_clk_levels,
-       .update_pcie_parameters = smu_v13_0_0_update_pcie_parameters,
+       .update_pcie_parameters = smu_v13_0_update_pcie_parameters,
        .get_thermal_temperature_range = smu_v13_0_0_get_thermal_temperature_range,
        .register_irq_handler = smu_v13_0_register_irq_handler,
        .enable_thermal_alert = smu_v13_0_enable_thermal_alert,
index 6ef1225..dc6104a 100644 (file)
 #define EPSILON 1
 
 #define smnPCIE_ESM_CTRL 0x193D0
-#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1ab40288
+#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1a340288
 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L
 #define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4
+#define MAX_LINK_WIDTH 6
 
 static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
        MSG_MAP(TestMessage,                         PPSMC_MSG_TestMessage,                     0),
@@ -708,16 +709,19 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
                *value = SMUQ10_TO_UINT(metrics->SocketPower) << 8;
                break;
        case METRICS_TEMPERATURE_HOTSPOT:
-               *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature);
+               *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature) *
+                        SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
                break;
        case METRICS_TEMPERATURE_MEM:
-               *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature);
+               *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature) *
+                        SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
                break;
        /* This is the max of all VRs and not just SOC VR.
         * No need to define another data type for the same.
         */
        case METRICS_TEMPERATURE_VRSOC:
-               *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature);
+               *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature) *
+                        SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
                break;
        default:
                *value = UINT_MAX;
@@ -1763,7 +1767,6 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        mutex_lock(&adev->pm.mutex);
        r = smu_v13_0_6_request_i2c_xfer(smu, req);
-       mutex_unlock(&adev->pm.mutex);
        if (r)
                goto fail;
 
@@ -1780,6 +1783,7 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap,
        }
        r = num_msgs;
 fail:
+       mutex_unlock(&adev->pm.mutex);
        kfree(req);
        return r;
 }
@@ -1966,6 +1970,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
        struct amdgpu_device *adev = smu->adev;
        int ret = 0, inst0, xcc0;
        MetricsTable_t *metrics;
+       u16 link_width_level;
 
        inst0 = adev->sdma.instance[0].aid_id;
        xcc0 = GET_INST(GC, 0);
@@ -1993,9 +1998,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
 
        gpu_metrics->average_socket_power =
                SMUQ10_TO_UINT(metrics->SocketPower);
-       /* Energy is reported in 15.625mJ units */
-       gpu_metrics->energy_accumulator =
-               SMUQ10_TO_UINT(metrics->SocketEnergyAcc);
+       /* Energy counter reported in 15.259uJ (2^-16) units */
+       gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
 
        gpu_metrics->current_gfxclk =
                SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]);
@@ -2017,8 +2021,12 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
        gpu_metrics->throttle_status = 0;
 
        if (!(adev->flags & AMD_IS_APU)) {
+               link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu);
+               if (link_width_level > MAX_LINK_WIDTH)
+                       link_width_level = 0;
+
                gpu_metrics->pcie_link_width =
-                       smu_v13_0_6_get_current_pcie_link_width_level(smu);
+                       DECODE_LANE_WIDTH(link_width_level);
                gpu_metrics->pcie_link_speed =
                        smu_v13_0_6_get_current_pcie_link_speed(smu);
        }
index cda4e81..62f2886 100644 (file)
@@ -323,10 +323,12 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
        struct smu_baco_context *smu_baco = &smu->smu_baco;
        PPTable_t *smc_pptable = table_context->driver_pptable;
        BoardTable_t *BoardTable = &smc_pptable->BoardTable;
+#if 0
        const OverDriveLimits_t * const overdrive_upperlimits =
                                &smc_pptable->SkuTable.OverDriveLimitsBasicMax;
        const OverDriveLimits_t * const overdrive_lowerlimits =
                                &smc_pptable->SkuTable.OverDriveLimitsMin;
+#endif
 
        if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC)
                smu->dc_controlled_by_gpio = true;
@@ -338,18 +340,22 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
        if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled))
                smu_baco->maco_support = true;
 
+#if 0
        if (!overdrive_lowerlimits->FeatureCtrlMask ||
            !overdrive_upperlimits->FeatureCtrlMask)
                smu->od_enabled = false;
 
-       table_context->thermal_controller_type =
-               powerplay_table->thermal_controller_type;
-
        /*
         * Instead of having its own buffer space and get overdrive_table copied,
         * smu->od_settings just points to the actual overdrive_table
         */
        smu->od_settings = &powerplay_table->overdrive_table;
+#else
+       smu->od_enabled = false;
+#endif
+
+       table_context->thermal_controller_type =
+               powerplay_table->thermal_controller_type;
 
        return 0;
 }
@@ -949,7 +955,7 @@ static int smu_v13_0_7_read_sensor(struct smu_context *smu,
                break;
        case AMDGPU_PP_SENSOR_GFX_MCLK:
                ret = smu_v13_0_7_get_smu_metrics_data(smu,
-                                                      METRICS_AVERAGE_UCLK,
+                                                      METRICS_CURR_UCLK,
                                                       (uint32_t *)data);
                *(uint32_t *)data *= 100;
                *size = 4;
@@ -1635,37 +1641,6 @@ static int smu_v13_0_7_force_clk_levels(struct smu_context *smu,
        return ret;
 }
 
-static int smu_v13_0_7_update_pcie_parameters(struct smu_context *smu,
-                                             uint32_t pcie_gen_cap,
-                                             uint32_t pcie_width_cap)
-{
-       struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
-       struct smu_13_0_pcie_table *pcie_table =
-                               &dpm_context->dpm_tables.pcie_table;
-       uint32_t smu_pcie_arg;
-       int ret, i;
-
-       for (i = 0; i < pcie_table->num_of_link_levels; i++) {
-               if (pcie_table->pcie_gen[i] > pcie_gen_cap)
-                       pcie_table->pcie_gen[i] = pcie_gen_cap;
-               if (pcie_table->pcie_lane[i] > pcie_width_cap)
-                       pcie_table->pcie_lane[i] = pcie_width_cap;
-
-               smu_pcie_arg = i << 16;
-               smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
-               smu_pcie_arg |= pcie_table->pcie_lane[i];
-
-               ret = smu_cmn_send_smc_msg_with_param(smu,
-                                                     SMU_MSG_OverridePcieParameters,
-                                                     smu_pcie_arg,
-                                                     NULL);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
-}
-
 static const struct smu_temperature_range smu13_thermal_policy[] =
 {
        {-273150,  99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
@@ -2234,7 +2209,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
        .feature_is_enabled = smu_cmn_feature_is_enabled,
        .print_clk_levels = smu_v13_0_7_print_clk_levels,
        .force_clk_levels = smu_v13_0_7_force_clk_levels,
-       .update_pcie_parameters = smu_v13_0_7_update_pcie_parameters,
+       .update_pcie_parameters = smu_v13_0_update_pcie_parameters,
        .get_thermal_temperature_range = smu_v13_0_7_get_thermal_temperature_range,
        .register_irq_handler = smu_v13_0_register_irq_handler,
        .enable_thermal_alert = smu_v13_0_enable_thermal_alert,
index 3943e89..e40a95e 100644 (file)
@@ -209,10 +209,6 @@ void armada_fbdev_setup(struct drm_device *dev)
                goto err_drm_client_init;
        }
 
-       ret = armada_fbdev_client_hotplug(&fbh->client);
-       if (ret)
-               drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&fbh->client);
 
        return;
index 504d51c..aadb396 100644 (file)
@@ -2517,9 +2517,11 @@ static irqreturn_t it6505_int_threaded_handler(int unused, void *data)
        };
        int int_status[3], i;
 
-       if (it6505->enable_drv_hold || pm_runtime_get_if_in_use(dev) <= 0)
+       if (it6505->enable_drv_hold || !it6505->powered)
                return IRQ_HANDLED;
 
+       pm_runtime_get_sync(dev);
+
        int_status[0] = it6505_read(it6505, INT_STATUS_01);
        int_status[1] = it6505_read(it6505, INT_STATUS_02);
        int_status[2] = it6505_read(it6505, INT_STATUS_03);
index 5163e52..9663601 100644 (file)
@@ -774,9 +774,7 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611,
        dsi->lanes = 4;
        dsi->format = MIPI_DSI_FMT_RGB888;
        dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
-                         MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA |
-                         MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP |
-                         MIPI_DSI_MODE_NO_EOT_PACKET;
+                         MIPI_DSI_MODE_VIDEO_HSE;
 
        ret = devm_mipi_dsi_attach(dev, dsi);
        if (ret < 0) {
index 043b810..73ec607 100644 (file)
@@ -1386,6 +1386,18 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi)
        disable_irq(dsi->irq);
 }
 
+static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable)
+{
+       u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
+
+       if (enable)
+               reg |= DSIM_FORCE_STOP_STATE;
+       else
+               reg &= ~DSIM_FORCE_STOP_STATE;
+
+       samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
+}
+
 static int samsung_dsim_init(struct samsung_dsim *dsi)
 {
        const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
@@ -1445,15 +1457,12 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge,
                                       struct drm_bridge_state *old_bridge_state)
 {
        struct samsung_dsim *dsi = bridge_to_dsi(bridge);
-       u32 reg;
 
        if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
                samsung_dsim_set_display_mode(dsi);
                samsung_dsim_set_display_enable(dsi, true);
        } else {
-               reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
-               reg &= ~DSIM_FORCE_STOP_STATE;
-               samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
+               samsung_dsim_set_stop_state(dsi, false);
        }
 
        dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE;
@@ -1463,16 +1472,12 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge,
                                        struct drm_bridge_state *old_bridge_state)
 {
        struct samsung_dsim *dsi = bridge_to_dsi(bridge);
-       u32 reg;
 
        if (!(dsi->state & DSIM_STATE_ENABLED))
                return;
 
-       if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
-               reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
-               reg |= DSIM_FORCE_STOP_STATE;
-               samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
-       }
+       if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type))
+               samsung_dsim_set_stop_state(dsi, true);
 
        dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE;
 }
@@ -1775,6 +1780,8 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host,
        if (ret)
                return ret;
 
+       samsung_dsim_set_stop_state(dsi, false);
+
        ret = mipi_dsi_create_packet(&xfer.packet, msg);
        if (ret < 0)
                return ret;
index 9d6dcaf..7b66f36 100644 (file)
@@ -1426,9 +1426,9 @@ void dw_hdmi_set_high_tmds_clock_ratio(struct dw_hdmi *hdmi,
        /* Control for TMDS Bit Period/TMDS Clock-Period Ratio */
        if (dw_hdmi_support_scdc(hdmi, display)) {
                if (mtmdsclock > HDMI14_MAX_TMDSCLK)
-                       drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 1);
+                       drm_scdc_set_high_tmds_clock_ratio(hdmi->curr_conn, 1);
                else
-                       drm_scdc_set_high_tmds_clock_ratio(&hdmi->connector, 0);
+                       drm_scdc_set_high_tmds_clock_ratio(hdmi->curr_conn, 0);
        }
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_set_high_tmds_clock_ratio);
@@ -2116,7 +2116,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
                                min_t(u8, bytes, SCDC_MIN_SOURCE_VERSION));
 
                        /* Enabled Scrambling in the Sink */
-                       drm_scdc_set_scrambling(&hdmi->connector, 1);
+                       drm_scdc_set_scrambling(hdmi->curr_conn, 1);
 
                        /*
                         * To activate the scrambler feature, you must ensure
@@ -2132,7 +2132,7 @@ static void hdmi_av_composer(struct dw_hdmi *hdmi,
                        hdmi_writeb(hdmi, 0, HDMI_FC_SCRAMBLER_CTRL);
                        hdmi_writeb(hdmi, (u8)~HDMI_MC_SWRSTZ_TMDSSWRST_REQ,
                                    HDMI_MC_SWRSTZ);
-                       drm_scdc_set_scrambling(&hdmi->connector, 0);
+                       drm_scdc_set_scrambling(hdmi->curr_conn, 0);
                }
        }
 
@@ -3553,6 +3553,7 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev,
        hdmi->bridge.ops = DRM_BRIDGE_OP_DETECT | DRM_BRIDGE_OP_EDID
                         | DRM_BRIDGE_OP_HPD;
        hdmi->bridge.interlace_allowed = true;
+       hdmi->bridge.ddc = hdmi->ddc;
 #ifdef CONFIG_OF
        hdmi->bridge.of_node = pdev->dev.of_node;
 #endif
index c499a14..f448b90 100644 (file)
  * @pwm_refclk_freq: Cache for the reference clock input to the PWM.
  */
 struct ti_sn65dsi86 {
-       struct auxiliary_device         bridge_aux;
-       struct auxiliary_device         gpio_aux;
-       struct auxiliary_device         aux_aux;
-       struct auxiliary_device         pwm_aux;
+       struct auxiliary_device         *bridge_aux;
+       struct auxiliary_device         *gpio_aux;
+       struct auxiliary_device         *aux_aux;
+       struct auxiliary_device         *pwm_aux;
 
        struct device                   *dev;
        struct regmap                   *regmap;
@@ -468,27 +468,34 @@ static void ti_sn65dsi86_delete_aux(void *data)
        auxiliary_device_delete(data);
 }
 
-/*
- * AUX bus docs say that a non-NULL release is mandatory, but it makes no
- * sense for the model used here where all of the aux devices are allocated
- * in the single shared structure. We'll use this noop as a workaround.
- */
-static void ti_sn65dsi86_noop(struct device *dev) {}
+static void ti_sn65dsi86_aux_device_release(struct device *dev)
+{
+       struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev);
+
+       kfree(aux);
+}
 
 static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata,
-                                      struct auxiliary_device *aux,
+                                      struct auxiliary_device **aux_out,
                                       const char *name)
 {
        struct device *dev = pdata->dev;
+       struct auxiliary_device *aux;
        int ret;
 
+       aux = kzalloc(sizeof(*aux), GFP_KERNEL);
+       if (!aux)
+               return -ENOMEM;
+
        aux->name = name;
        aux->dev.parent = dev;
-       aux->dev.release = ti_sn65dsi86_noop;
+       aux->dev.release = ti_sn65dsi86_aux_device_release;
        device_set_of_node_from_dev(&aux->dev, dev);
        ret = auxiliary_device_init(aux);
-       if (ret)
+       if (ret) {
+               kfree(aux);
                return ret;
+       }
        ret = devm_add_action_or_reset(dev, ti_sn65dsi86_uninit_aux, aux);
        if (ret)
                return ret;
@@ -497,6 +504,8 @@ static int ti_sn65dsi86_add_aux_device(struct ti_sn65dsi86 *pdata,
        if (ret)
                return ret;
        ret = devm_add_action_or_reset(dev, ti_sn65dsi86_delete_aux, aux);
+       if (!ret)
+               *aux_out = aux;
 
        return ret;
 }
index 2c45456..c277b19 100644 (file)
@@ -140,6 +140,12 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state)
        if (!state->planes)
                goto fail;
 
+       /*
+        * Because drm_atomic_state can be committed asynchronously we need our
+        * own reference and cannot rely on the on implied by drm_file in the
+        * ioctl call.
+        */
+       drm_dev_get(dev);
        state->dev = dev;
 
        drm_dbg_atomic(dev, "Allocated atomic state %p\n", state);
@@ -299,7 +305,8 @@ EXPORT_SYMBOL(drm_atomic_state_clear);
 void __drm_atomic_state_free(struct kref *ref)
 {
        struct drm_atomic_state *state = container_of(ref, typeof(*state), ref);
-       struct drm_mode_config *config = &state->dev->mode_config;
+       struct drm_device *dev = state->dev;
+       struct drm_mode_config *config = &dev->mode_config;
 
        drm_atomic_state_clear(state);
 
@@ -311,6 +318,8 @@ void __drm_atomic_state_free(struct kref *ref)
                drm_atomic_state_default_release(state);
                kfree(state);
        }
+
+       drm_dev_put(dev);
 }
 EXPORT_SYMBOL(__drm_atomic_state_free);
 
index f6292ba..037e36f 100644 (file)
@@ -122,13 +122,34 @@ EXPORT_SYMBOL(drm_client_init);
  * drm_client_register() it is no longer permissible to call drm_client_release()
  * directly (outside the unregister callback), instead cleanup will happen
  * automatically on driver unload.
+ *
+ * Registering a client generates a hotplug event that allows the client
+ * to set up its display from pre-existing outputs. The client must have
+ * initialized its state to able to handle the hotplug event successfully.
  */
 void drm_client_register(struct drm_client_dev *client)
 {
        struct drm_device *dev = client->dev;
+       int ret;
 
        mutex_lock(&dev->clientlist_mutex);
        list_add(&client->list, &dev->clientlist);
+
+       if (client->funcs && client->funcs->hotplug) {
+               /*
+                * Perform an initial hotplug event to pick up the
+                * display configuration for the client. This step
+                * has to be performed *after* registering the client
+                * in the list of clients, or a concurrent hotplug
+                * event might be lost; leaving the display off.
+                *
+                * Hold the clientlist_mutex as for a regular hotplug
+                * event.
+                */
+               ret = client->funcs->hotplug(client);
+               if (ret)
+                       drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
+       }
        mutex_unlock(&dev->clientlist_mutex);
 }
 EXPORT_SYMBOL(drm_client_register);
index 1b12a3c..871e4e2 100644 (file)
@@ -311,6 +311,9 @@ static bool drm_client_target_cloned(struct drm_device *dev,
        can_clone = true;
        dmt_mode = drm_mode_find_dmt(dev, 1024, 768, 60, false);
 
+       if (!dmt_mode)
+               goto fail;
+
        for (i = 0; i < connector_count; i++) {
                if (!enabled[i])
                        continue;
@@ -326,11 +329,13 @@ static bool drm_client_target_cloned(struct drm_device *dev,
                if (!modes[i])
                        can_clone = false;
        }
+       kfree(dmt_mode);
 
        if (can_clone) {
                DRM_DEBUG_KMS("can clone using 1024x768\n");
                return true;
        }
+fail:
        DRM_INFO("kms: can't enable cloning when we probably wanted to.\n");
        return false;
 }
@@ -862,6 +867,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
                                break;
                        }
 
+                       kfree(modeset->mode);
                        modeset->mode = drm_mode_duplicate(dev, mode);
                        drm_connector_get(connector);
                        modeset->connectors[modeset->num_connectors++] = connector;
index e0dbd91..1f47096 100644 (file)
@@ -3456,6 +3456,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto
                            connector->base.id, connector->name);
                return NULL;
        }
+       if (!(pt->misc & DRM_EDID_PT_SEPARATE_SYNC)) {
+               drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Composite sync not supported\n",
+                           connector->base.id, connector->name);
+       }
 
        /* it is incorrect if hsync/vsync width is zero */
        if (!hsync_pulse_width || !vsync_pulse_width) {
@@ -3502,27 +3506,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto
        if (info->quirks & EDID_QUIRK_DETAILED_SYNC_PP) {
                mode->flags |= DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC;
        } else {
-               switch (pt->misc & DRM_EDID_PT_SYNC_MASK) {
-               case DRM_EDID_PT_ANALOG_CSYNC:
-               case DRM_EDID_PT_BIPOLAR_ANALOG_CSYNC:
-                       drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Analog composite sync!\n",
-                                   connector->base.id, connector->name);
-                       mode->flags |= DRM_MODE_FLAG_CSYNC | DRM_MODE_FLAG_NCSYNC;
-                       break;
-               case DRM_EDID_PT_DIGITAL_CSYNC:
-                       drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Digital composite sync!\n",
-                                   connector->base.id, connector->name);
-                       mode->flags |= DRM_MODE_FLAG_CSYNC;
-                       mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
-                               DRM_MODE_FLAG_PCSYNC : DRM_MODE_FLAG_NCSYNC;
-                       break;
-               case DRM_EDID_PT_DIGITAL_SEPARATE_SYNC:
-                       mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
-                               DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
-                       mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
-                               DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
-                       break;
-               }
+               mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
+                       DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
+               mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
+                       DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
        }
 
 set_size:
index d86773f..f353daf 100644 (file)
@@ -217,7 +217,7 @@ static const struct drm_client_funcs drm_fbdev_dma_client_funcs = {
  * drm_fbdev_dma_setup() - Setup fbdev emulation for GEM DMA helpers
  * @dev: DRM device
  * @preferred_bpp: Preferred bits per pixel for the device.
- *                 @dev->mode_config.preferred_depth is used if this is zero.
+ *                 32 is used if this is zero.
  *
  * This function sets up fbdev emulation for GEM DMA drivers that support
  * dumb buffers with a virtual address and that can be mmap'ed.
@@ -252,10 +252,6 @@ void drm_fbdev_dma_setup(struct drm_device *dev, unsigned int preferred_bpp)
                goto err_drm_client_init;
        }
 
-       ret = drm_fbdev_dma_client_hotplug(&fb_helper->client);
-       if (ret)
-               drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&fb_helper->client);
 
        return;
index 98ae703..b9343fb 100644 (file)
@@ -339,10 +339,6 @@ void drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp)
                goto err_drm_client_init;
        }
 
-       ret = drm_fbdev_generic_client_hotplug(&fb_helper->client);
-       if (ret)
-               drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&fb_helper->client);
 
        return;
index 4ea6507..baaf0e0 100644 (file)
@@ -623,7 +623,13 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
        int ret;
 
        if (obj->import_attach) {
+               /* Reset both vm_ops and vm_private_data, so we don't end up with
+                * vm_ops pointing to our implementation if the dma-buf backend
+                * doesn't set those fields.
+                */
                vma->vm_private_data = NULL;
+               vma->vm_ops = NULL;
+
                ret = dma_buf_mmap(obj->dma_buf, vma, 0);
 
                /* Drop the reference drm_gem_mmap_obj() acquired.*/
index 2fb9bf9..3f47948 100644 (file)
@@ -262,6 +262,26 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
 }
 
 #define DRM_OUTPUT_POLL_PERIOD (10*HZ)
+static void reschedule_output_poll_work(struct drm_device *dev)
+{
+       unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
+
+       if (dev->mode_config.delayed_event)
+               /*
+                * FIXME:
+                *
+                * Use short (1s) delay to handle the initial delayed event.
+                * This delay should not be needed, but Optimus/nouveau will
+                * fail in a mysterious way if the delayed event is handled as
+                * soon as possible like it is done in
+                * drm_helper_probe_single_connector_modes() in case the poll
+                * was enabled before.
+                */
+               delay = HZ;
+
+       schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+}
+
 /**
  * drm_kms_helper_poll_enable - re-enable output polling.
  * @dev: drm_device
@@ -279,37 +299,41 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
  */
 void drm_kms_helper_poll_enable(struct drm_device *dev)
 {
-       bool poll = false;
-       unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
-
        if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll ||
            dev->mode_config.poll_running)
                return;
 
-       poll = drm_kms_helper_enable_hpd(dev);
-
-       if (dev->mode_config.delayed_event) {
-               /*
-                * FIXME:
-                *
-                * Use short (1s) delay to handle the initial delayed event.
-                * This delay should not be needed, but Optimus/nouveau will
-                * fail in a mysterious way if the delayed event is handled as
-                * soon as possible like it is done in
-                * drm_helper_probe_single_connector_modes() in case the poll
-                * was enabled before.
-                */
-               poll = true;
-               delay = HZ;
-       }
-
-       if (poll)
-               schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+       if (drm_kms_helper_enable_hpd(dev) ||
+           dev->mode_config.delayed_event)
+               reschedule_output_poll_work(dev);
 
        dev->mode_config.poll_running = true;
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable);
 
+/**
+ * drm_kms_helper_poll_reschedule - reschedule the output polling work
+ * @dev: drm_device
+ *
+ * This function reschedules the output polling work, after polling for a
+ * connector has been enabled.
+ *
+ * Drivers must call this helper after enabling polling for a connector by
+ * setting %DRM_CONNECTOR_POLL_CONNECT / %DRM_CONNECTOR_POLL_DISCONNECT flags
+ * in drm_connector::polled. Note that after disabling polling by clearing these
+ * flags for a connector will stop the output polling work automatically if
+ * the polling is disabled for all other connectors as well.
+ *
+ * The function can be called only after polling has been enabled by calling
+ * drm_kms_helper_poll_init() / drm_kms_helper_poll_enable().
+ */
+void drm_kms_helper_poll_reschedule(struct drm_device *dev)
+{
+       if (dev->mode_config.poll_running)
+               reschedule_output_poll_work(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_reschedule);
+
 static enum drm_connector_status
 drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force)
 {
index 0c2be83..e592c5d 100644 (file)
@@ -353,10 +353,10 @@ EXPORT_SYMBOL(drm_syncobj_replace_fence);
  */
 static int drm_syncobj_assign_null_handle(struct drm_syncobj *syncobj)
 {
-       struct dma_fence *fence = dma_fence_allocate_private_stub();
+       struct dma_fence *fence = dma_fence_allocate_private_stub(ktime_get());
 
-       if (IS_ERR(fence))
-               return PTR_ERR(fence);
+       if (!fence)
+               return -ENOMEM;
 
        drm_syncobj_replace_fence(syncobj, fence);
        dma_fence_put(fence);
index fdf6558..226310c 100644 (file)
@@ -215,10 +215,6 @@ void exynos_drm_fbdev_setup(struct drm_device *dev)
        if (ret)
                goto err_drm_client_init;
 
-       ret = exynos_drm_fbdev_client_hotplug(&fb_helper->client);
-       if (ret)
-               drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&fb_helper->client);
 
        return;
index 955cbe9..0544265 100644 (file)
@@ -328,10 +328,6 @@ void psb_fbdev_setup(struct drm_psb_private *dev_priv)
                goto err_drm_fb_helper_unprepare;
        }
 
-       ret = psb_fbdev_client_hotplug(&fb_helper->client);
-       if (ret)
-               drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&fb_helper->client);
 
        return;
index 789dce9..dcbda9b 100644 (file)
@@ -23,6 +23,11 @@ subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
 subdir-ccflags-y += $(call cc-disable-warning, frame-address)
 subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
 
+# Fine grained warnings disable
+CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init)
+CFLAGS_display/intel_display_device.o = $(call cc-disable-warning, override-init)
+CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init)
+
 subdir-ccflags-y += -I$(srctree)/$(src)
 
 # Please keep these build lists sorted!
index d853360..16603d5 100644 (file)
@@ -4564,7 +4564,6 @@ copy_bigjoiner_crtc_state_modeset(struct intel_atomic_state *state,
        saved_state->uapi = slave_crtc_state->uapi;
        saved_state->scaler_state = slave_crtc_state->scaler_state;
        saved_state->shared_dpll = slave_crtc_state->shared_dpll;
-       saved_state->dpll_hw_state = slave_crtc_state->dpll_hw_state;
        saved_state->crc_enabled = slave_crtc_state->crc_enabled;
 
        intel_crtc_free_hw_state(slave_crtc_state);
index 3fd30e7..b0c6a2a 100644 (file)
@@ -16,9 +16,6 @@
 #include "intel_display_reg_defs.h"
 #include "intel_fbc.h"
 
-__diag_push();
-__diag_ignore_all("-Woverride-init", "Allow overriding inherited members");
-
 static const struct intel_display_device_info no_display = {};
 
 #define PIPE_A_OFFSET          0x70000
@@ -665,12 +662,24 @@ static const struct intel_display_device_info xe_lpdp_display = {
                BIT(TRANSCODER_C) | BIT(TRANSCODER_D),
 };
 
-__diag_pop();
+/*
+ * Separate detection for no display cases to keep the display id array simple.
+ *
+ * IVB Q requires subvendor and subdevice matching to differentiate from IVB D
+ * GT2 server.
+ */
+static bool has_no_display(struct pci_dev *pdev)
+{
+       static const struct pci_device_id ids[] = {
+               INTEL_IVB_Q_IDS(0),
+               {}
+       };
+
+       return pci_match_id(ids, pdev);
+}
 
 #undef INTEL_VGA_DEVICE
-#undef INTEL_QUANTA_VGA_DEVICE
 #define INTEL_VGA_DEVICE(id, info) { id, info }
-#define INTEL_QUANTA_VGA_DEVICE(info) { 0x16a, info }
 
 static const struct {
        u32 devid;
@@ -695,7 +704,6 @@ static const struct {
        INTEL_IRONLAKE_M_IDS(&ilk_m_display),
        INTEL_SNB_D_IDS(&snb_display),
        INTEL_SNB_M_IDS(&snb_display),
-       INTEL_IVB_Q_IDS(NULL),          /* must be first IVB in list */
        INTEL_IVB_M_IDS(&ivb_display),
        INTEL_IVB_D_IDS(&ivb_display),
        INTEL_HSW_IDS(&hsw_display),
@@ -780,6 +788,11 @@ intel_display_device_probe(struct drm_i915_private *i915, bool has_gmdid,
        if (has_gmdid)
                return probe_gmdid_display(i915, gmdid_ver, gmdid_rel, gmdid_step);
 
+       if (has_no_display(pdev)) {
+               drm_dbg_kms(&i915->drm, "Device doesn't have display\n");
+               return &no_display;
+       }
+
        for (i = 0; i < ARRAY_SIZE(intel_display_ids); i++) {
                if (intel_display_ids[i].devid == pdev->device)
                        return intel_display_ids[i].info;
index 7c5fddb..fbfd8f9 100644 (file)
@@ -166,6 +166,8 @@ struct i915_vma *intel_dpt_pin(struct i915_address_space *vm)
                i915_vma_get(vma);
        }
 
+       dpt->obj->mm.dirty = true;
+
        atomic_dec(&i915->gpu_error.pending_fb_pin);
        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
 
@@ -261,7 +263,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
                dpt_obj = i915_gem_object_create_stolen(i915, size);
        if (IS_ERR(dpt_obj) && !HAS_LMEM(i915)) {
                drm_dbg_kms(&i915->drm, "Allocating dpt from smem\n");
-               dpt_obj = i915_gem_object_create_internal(i915, size);
+               dpt_obj = i915_gem_object_create_shmem(i915, size);
        }
        if (IS_ERR(dpt_obj))
                return ERR_CAST(dpt_obj);
index 1cc0ddc..80c3f88 100644 (file)
@@ -135,9 +135,6 @@ static int intel_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma)
        return i915_gem_fb_mmap(obj, vma);
 }
 
-__diag_push();
-__diag_ignore_all("-Woverride-init", "Allow overriding the default ops");
-
 static const struct fb_ops intelfb_ops = {
        .owner = THIS_MODULE,
        __FB_DEFAULT_DEFERRED_OPS_RDWR(intel_fbdev),
@@ -149,8 +146,6 @@ static const struct fb_ops intelfb_ops = {
        .fb_mmap = intel_fbdev_mmap,
 };
 
-__diag_pop();
-
 static int intelfb_alloc(struct drm_fb_helper *helper,
                         struct drm_fb_helper_surface_size *sizes)
 {
index 1160fa2..5eac703 100644 (file)
@@ -211,7 +211,7 @@ intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv)
 
        /* Enable polling and queue hotplug re-enabling. */
        if (hpd_disabled) {
-               drm_kms_helper_poll_enable(&dev_priv->drm);
+               drm_kms_helper_poll_reschedule(&dev_priv->drm);
                mod_delayed_work(dev_priv->unordered_wq,
                                 &dev_priv->display.hotplug.reenable_work,
                                 msecs_to_jiffies(HPD_STORM_REENABLE_DELAY));
@@ -649,7 +649,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work)
        drm_connector_list_iter_end(&conn_iter);
 
        if (enabled)
-               drm_kms_helper_poll_enable(&dev_priv->drm);
+               drm_kms_helper_poll_reschedule(&dev_priv->drm);
 
        mutex_unlock(&dev_priv->drm.mode_config.mutex);
 
index 21f9212..67e3aaf 100644 (file)
@@ -2752,7 +2752,7 @@ static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void)
        __drm_atomic_helper_connector_reset(&sdvo_connector->base.base,
                                            &conn_state->base.base);
 
-       INIT_LIST_HEAD(&sdvo_connector->base.panel.fixed_modes);
+       intel_panel_init_alloc(&sdvo_connector->base);
 
        return sdvo_connector;
 }
index df6c9a8..6b9f6cf 100644 (file)
@@ -1246,8 +1246,10 @@ static int igt_write_huge(struct drm_i915_private *i915,
         * times in succession a possibility by enlarging the permutation array.
         */
        order = i915_random_order(count * count, &prng);
-       if (!order)
-               return -ENOMEM;
+       if (!order) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
        max = div_u64(max - size, max_page_size);
index 23857cc..2702ad4 100644 (file)
@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
        return MI_ARB_CHECK | 1 << 8 | state;
 }
 
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
+static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
 {
-       u32 gsi_offset = gt->uncore->gsi_offset;
+       switch (engine->id) {
+       case RCS0:
+               return GEN12_CCS_AUX_INV;
+       case BCS0:
+               return GEN12_BCS0_AUX_INV;
+       case VCS0:
+               return GEN12_VD0_AUX_INV;
+       case VCS2:
+               return GEN12_VD2_AUX_INV;
+       case VECS0:
+               return GEN12_VE0_AUX_INV;
+       case CCS0:
+               return GEN12_CCS0_AUX_INV;
+       default:
+               return INVALID_MMIO_REG;
+       }
+}
+
+static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
+{
+       i915_reg_t reg = gen12_get_aux_inv_reg(engine);
+
+       if (IS_PONTEVECCHIO(engine->i915))
+               return false;
+
+       /*
+        * So far platforms supported by i915 having flat ccs do not require
+        * AUX invalidation. Check also whether the engine requires it.
+        */
+       return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
+}
+
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+{
+       i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
+       u32 gsi_offset = engine->gt->uncore->gsi_offset;
+
+       if (!gen12_needs_ccs_aux_inv(engine))
+               return cs;
 
        *cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
        *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
        *cs++ = AUX_INV;
-       *cs++ = MI_NOOP;
+
+       *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+               MI_SEMAPHORE_REGISTER_POLL |
+               MI_SEMAPHORE_POLL |
+               MI_SEMAPHORE_SAD_EQ_SDD;
+       *cs++ = 0;
+       *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
+       *cs++ = 0;
+       *cs++ = 0;
 
        return cs;
 }
@@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 {
        struct intel_engine_cs *engine = rq->engine;
 
-       if (mode & EMIT_FLUSH) {
-               u32 flags = 0;
+       /*
+        * On Aux CCS platforms the invalidation of the Aux
+        * table requires quiescing memory traffic beforehand
+        */
+       if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
+               u32 bit_group_0 = 0;
+               u32 bit_group_1 = 0;
                int err;
                u32 *cs;
 
@@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
                if (err)
                        return err;
 
-               flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_FLUSH_L3;
-               flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
-               flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+               bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+               /*
+                * When required, in MTL and beyond platforms we
+                * need to set the CCS_FLUSH bit in the pipe control
+                */
+               if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+                       bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+
+               bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+               bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+               bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+               bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
                /* Wa_1409600907:tgl,adl-p */
-               flags |= PIPE_CONTROL_DEPTH_STALL;
-               flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
-               flags |= PIPE_CONTROL_FLUSH_ENABLE;
+               bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
+               bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+               bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;
 
-               flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-               flags |= PIPE_CONTROL_QW_WRITE;
+               bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
+               bit_group_1 |= PIPE_CONTROL_QW_WRITE;
 
-               flags |= PIPE_CONTROL_CS_STALL;
+               bit_group_1 |= PIPE_CONTROL_CS_STALL;
 
                if (!HAS_3D_PIPELINE(engine->i915))
-                       flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+                       bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
                else if (engine->class == COMPUTE_CLASS)
-                       flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+                       bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
 
                cs = intel_ring_begin(rq, 6);
                if (IS_ERR(cs))
                        return PTR_ERR(cs);
 
-               cs = gen12_emit_pipe_control(cs,
-                                            PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
-                                            flags, LRC_PPHWSP_SCRATCH_ADDR);
+               cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
+                                            LRC_PPHWSP_SCRATCH_ADDR);
                intel_ring_advance(rq, cs);
        }
 
@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
                else if (engine->class == COMPUTE_CLASS)
                        flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
 
-               if (!HAS_FLAT_CCS(rq->engine->i915))
-                       count = 8 + 4;
-               else
-                       count = 8;
+               count = 8;
+               if (gen12_needs_ccs_aux_inv(rq->engine))
+                       count += 8;
 
                cs = intel_ring_begin(rq, count);
                if (IS_ERR(cs))
@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
                cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
 
-               if (!HAS_FLAT_CCS(rq->engine->i915)) {
-                       /* hsdes: 1809175790 */
-                       cs = gen12_emit_aux_table_inv(rq->engine->gt,
-                                                     cs, GEN12_GFX_CCS_AUX_NV);
-               }
+               cs = gen12_emit_aux_table_inv(engine, cs);
 
                *cs++ = preparser_disable(false);
                intel_ring_advance(rq, cs);
@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
 
 int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
 {
-       intel_engine_mask_t aux_inv = 0;
-       u32 cmd, *cs;
+       u32 cmd = 4;
+       u32 *cs;
 
-       cmd = 4;
        if (mode & EMIT_INVALIDATE) {
                cmd += 2;
 
-               if (!HAS_FLAT_CCS(rq->engine->i915) &&
-                   (rq->engine->class == VIDEO_DECODE_CLASS ||
-                    rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
-                       aux_inv = rq->engine->mask &
-                               ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
-                       if (aux_inv)
-                               cmd += 4;
-               }
+               if (gen12_needs_ccs_aux_inv(rq->engine))
+                       cmd += 8;
        }
 
        cs = intel_ring_begin(rq, cmd);
@@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
                cmd |= MI_INVALIDATE_TLB;
                if (rq->engine->class == VIDEO_DECODE_CLASS)
                        cmd |= MI_INVALIDATE_BSD;
+
+               if (gen12_needs_ccs_aux_inv(rq->engine) &&
+                   rq->engine->class == COPY_ENGINE_CLASS)
+                       cmd |= MI_FLUSH_DW_CCS;
        }
 
        *cs++ = cmd;
@@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
        *cs++ = 0; /* upper addr */
        *cs++ = 0; /* value */
 
-       if (aux_inv) { /* hsdes: 1809175790 */
-               if (rq->engine->class == VIDEO_DECODE_CLASS)
-                       cs = gen12_emit_aux_table_inv(rq->engine->gt,
-                                                     cs, GEN12_VD0_AUX_NV);
-               else
-                       cs = gen12_emit_aux_table_inv(rq->engine->gt,
-                                                     cs, GEN12_VE0_AUX_NV);
-       }
+       cs = gen12_emit_aux_table_inv(rq->engine, cs);
 
        if (mode & EMIT_INVALIDATE)
                *cs++ = preparser_disable(false);
index 655e5c0..867ba69 100644 (file)
@@ -13,6 +13,7 @@
 #include "intel_gt_regs.h"
 #include "intel_gpu_commands.h"
 
+struct intel_engine_cs;
 struct intel_gt;
 struct i915_request;
 
@@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
 
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
 
 static inline u32 *
-__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0,
+                        u32 bit_group_1, u32 offset)
 {
        memset(batch, 0, 6 * sizeof(u32));
 
-       batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
-       batch[1] = flags1;
+       batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
+       batch[1] = bit_group_1;
        batch[2] = offset;
 
        return batch + 6;
 }
 
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *gen8_emit_pipe_control(u32 *batch,
+                                         u32 bit_group_1, u32 offset)
 {
-       return __gen8_emit_pipe_control(batch, 0, flags, offset);
+       return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset);
 }
 
-static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0,
+                                          u32 bit_group_1, u32 offset)
 {
-       return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+       return __gen8_emit_pipe_control(batch, bit_group_0,
+                                       bit_group_1, offset);
 }
 
 static inline u32 *
index f948d33..c8568e5 100644 (file)
@@ -37,9 +37,6 @@ static u64 gen8_pte_encode(dma_addr_t addr,
        if (unlikely(flags & PTE_READ_ONLY))
                pte &= ~GEN8_PAGE_RW;
 
-       if (flags & PTE_LM)
-               pte |= GEN12_PPGTT_PTE_LM;
-
        /*
         * For pre-gen12 platforms pat_index is the same as enum
         * i915_cache_level, so the switch-case here is still valid.
index 5d143e2..2bd8d98 100644 (file)
 #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
 #define MI_SEMAPHORE_WAIT      MI_INSTR(0x1c, 2) /* GEN8+ */
 #define MI_SEMAPHORE_WAIT_TOKEN        MI_INSTR(0x1c, 3) /* GEN12+ */
+#define   MI_SEMAPHORE_REGISTER_POLL   (1 << 16)
 #define   MI_SEMAPHORE_POLL            (1 << 15)
 #define   MI_SEMAPHORE_SAD_GT_SDD      (0 << 12)
 #define   MI_SEMAPHORE_SAD_GTE_SDD     (1 << 12)
 #define   PIPE_CONTROL_QW_WRITE                                (1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL                     (1<<13)
+#define   PIPE_CONTROL_CCS_FLUSH                       (1<<13) /* MTL+ */
 #define   PIPE_CONTROL_WRITE_FLUSH                     (1<<12)
 #define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH       (1<<12) /* gen6+ */
 #define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE    (1<<11) /* MBZ on ILK */
index 718cb2c..2cdfb2f 100644 (file)
 #define GEN8_PRIVATE_PAT_HI                    _MMIO(0x40e0 + 4)
 #define GEN10_PAT_INDEX(index)                 _MMIO(0x40e0 + (index) * 4)
 #define BSD_HWS_PGA_GEN7                       _MMIO(0x4180)
-#define GEN12_GFX_CCS_AUX_NV                   _MMIO(0x4208)
-#define GEN12_VD0_AUX_NV                       _MMIO(0x4218)
-#define GEN12_VD1_AUX_NV                       _MMIO(0x4228)
+
+#define GEN12_CCS_AUX_INV                      _MMIO(0x4208)
+#define GEN12_VD0_AUX_INV                      _MMIO(0x4218)
+#define GEN12_VE0_AUX_INV                      _MMIO(0x4238)
+#define GEN12_BCS0_AUX_INV                     _MMIO(0x4248)
 
 #define GEN8_RTCR                              _MMIO(0x4260)
 #define GEN8_M1TCR                             _MMIO(0x4264)
 #define GEN8_BTCR                              _MMIO(0x426c)
 #define GEN8_VTCR                              _MMIO(0x4270)
 
-#define GEN12_VD2_AUX_NV                       _MMIO(0x4298)
-#define GEN12_VD3_AUX_NV                       _MMIO(0x42a8)
-#define GEN12_VE0_AUX_NV                       _MMIO(0x4238)
-
 #define BLT_HWS_PGA_GEN7                       _MMIO(0x4280)
 
-#define GEN12_VE1_AUX_NV                       _MMIO(0x42b8)
+#define GEN12_VD2_AUX_INV                      _MMIO(0x4298)
+#define GEN12_CCS0_AUX_INV                     _MMIO(0x42c8)
 #define   AUX_INV                              REG_BIT(0)
+
 #define VEBOX_HWS_PGA_GEN7                     _MMIO(0x4380)
 
 #define GEN12_AUX_ERR_DBG                      _MMIO(0x43f4)
index 2f6a9be..731d9f2 100644 (file)
@@ -670,7 +670,7 @@ __vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size)
        if (IS_ERR(obj))
                return ERR_CAST(obj);
 
-       i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED);
+       i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
 
        vma = i915_vma_instance(obj, vm, NULL);
        if (IS_ERR(vma)) {
index a4ec20a..9477c24 100644 (file)
@@ -1364,10 +1364,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
            IS_DG2_G11(ce->engine->i915))
                cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
 
-       /* hsdes: 1809175790 */
-       if (!HAS_FLAT_CCS(ce->engine->i915))
-               cs = gen12_emit_aux_table_inv(ce->engine->gt,
-                                             cs, GEN12_GFX_CCS_AUX_NV);
+       cs = gen12_emit_aux_table_inv(ce->engine, cs);
 
        /* Wa_16014892111 */
        if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
@@ -1392,17 +1389,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
                                                    PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
                                                    0);
 
-       /* hsdes: 1809175790 */
-       if (!HAS_FLAT_CCS(ce->engine->i915)) {
-               if (ce->engine->class == VIDEO_DECODE_CLASS)
-                       cs = gen12_emit_aux_table_inv(ce->engine->gt,
-                                                     cs, GEN12_VD0_AUX_NV);
-               else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
-                       cs = gen12_emit_aux_table_inv(ce->engine->gt,
-                                                     cs, GEN12_VE0_AUX_NV);
-       }
-
-       return cs;
+       return gen12_emit_aux_table_inv(ce->engine, cs);
 }
 
 static void
index ee9f83a..477df26 100644 (file)
@@ -470,12 +470,19 @@ int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val)
        ret = slpc_set_param(slpc,
                             SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
                             val);
-       if (ret)
+       if (ret) {
                guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n",
                                val, ERR_PTR(ret));
-       else
+       } else {
                slpc->ignore_eff_freq = val;
 
+               /* Set min to RPn when we disable efficient freq */
+               if (val)
+                       ret = slpc_set_param(slpc,
+                                            SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+                                            slpc->min_freq);
+       }
+
        intel_runtime_pm_put(&i915->runtime_pm, wakeref);
        mutex_unlock(&slpc->lock);
        return ret;
@@ -602,9 +609,8 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
                return ret;
 
        if (!slpc->min_freq_softlimit) {
-               ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit);
-               if (unlikely(ret))
-                       return ret;
+               /* Min softlimit is initialized to RPn */
+               slpc->min_freq_softlimit = slpc->min_freq;
                slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
        } else {
                return intel_guc_slpc_set_min_freq(slpc,
@@ -755,6 +761,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
                return ret;
        }
 
+       /* Set cached value of ignore efficient freq */
+       intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
+
        /* Revert SLPC min/max to softlimits if necessary */
        ret = slpc_set_softlimits(slpc);
        if (unlikely(ret)) {
@@ -765,9 +774,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
        /* Set cached media freq ratio mode */
        intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
 
-       /* Set cached value of ignore efficient freq */
-       intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
-
        return 0;
 }
 
index ddd1462..fa70def 100644 (file)
@@ -26,6 +26,7 @@
  * The kernel driver is only responsible for loading the HuC firmware and
  * triggering its security authentication. This is done differently depending
  * on the platform:
+ *
  * - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
  *   and the authentication via GuC
  * - DG2: load and authentication are both performed via GSC.
@@ -33,6 +34,7 @@
  *   not-DG2 older platforms), while the authentication is done in 2-steps,
  *   a first auth for clear-media workloads via GuC and a second one for all
  *   workloads via GSC.
+ *
  * On platforms where the GuC does the authentication, to correctly do so the
  * HuC binary must be loaded before the GuC one.
  * Loading the HuC is optional; however, not using the HuC might negatively
index 2a0438f..af9afdb 100644 (file)
@@ -491,7 +491,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
                return;
        }
 
-       msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, reg);
+       msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, value);
 
        // check the msg in DATA register.
        msg = vgpu_vreg(vgpu, offset + 4);
index 8ef9388..5ec2930 100644 (file)
@@ -449,8 +449,11 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
                }
        } while (unlikely(is_barrier(active)));
 
-       if (!__i915_active_fence_set(active, fence))
+       fence = __i915_active_fence_set(active, fence);
+       if (!fence)
                __i915_active_acquire(ref);
+       else
+               dma_fence_put(fence);
 
 out:
        i915_active_release(ref);
@@ -469,13 +472,9 @@ __i915_active_set_fence(struct i915_active *ref,
                return NULL;
        }
 
-       rcu_read_lock();
        prev = __i915_active_fence_set(active, fence);
-       if (prev)
-               prev = dma_fence_get_rcu(prev);
-       else
+       if (!prev)
                __i915_active_acquire(ref);
-       rcu_read_unlock();
 
        return prev;
 }
@@ -1019,10 +1018,11 @@ void i915_request_add_active_barriers(struct i915_request *rq)
  *
  * Records the new @fence as the last active fence along its timeline in
  * this active tracker, moving the tracking callbacks from the previous
- * fence onto this one. Returns the previous fence (if not already completed),
- * which the caller must ensure is executed before the new fence. To ensure
- * that the order of fences within the timeline of the i915_active_fence is
- * understood, it should be locked by the caller.
+ * fence onto this one. Gets and returns a reference to the previous fence
+ * (if not already completed), which the caller must put after making sure
+ * that it is executed before the new fence. To ensure that the order of
+ * fences within the timeline of the i915_active_fence is understood, it
+ * should be locked by the caller.
  */
 struct dma_fence *
 __i915_active_fence_set(struct i915_active_fence *active,
@@ -1031,7 +1031,23 @@ __i915_active_fence_set(struct i915_active_fence *active,
        struct dma_fence *prev;
        unsigned long flags;
 
-       if (fence == rcu_access_pointer(active->fence))
+       /*
+        * In case of fences embedded in i915_requests, their memory is
+        * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
+        * by new requests.  Then, there is a risk of passing back a pointer
+        * to a new, completely unrelated fence that reuses the same memory
+        * while tracked under a different active tracker.  Combined with i915
+        * perf open/close operations that build await dependencies between
+        * engine kernel context requests and user requests from different
+        * timelines, this can lead to dependency loops and infinite waits.
+        *
+        * As a countermeasure, we try to get a reference to the active->fence
+        * first, so if we succeed and pass it back to our user then it is not
+        * released and potentially reused by an unrelated request before the
+        * user has a chance to set up an await dependency on it.
+        */
+       prev = i915_active_fence_get(active);
+       if (fence == prev)
                return fence;
 
        GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
@@ -1040,27 +1056,56 @@ __i915_active_fence_set(struct i915_active_fence *active,
         * Consider that we have two threads arriving (A and B), with
         * C already resident as the active->fence.
         *
-        * A does the xchg first, and so it sees C or NULL depending
-        * on the timing of the interrupt handler. If it is NULL, the
-        * previous fence must have been signaled and we know that
-        * we are first on the timeline. If it is still present,
-        * we acquire the lock on that fence and serialise with the interrupt
-        * handler, in the process removing it from any future interrupt
-        * callback. A will then wait on C before executing (if present).
-        *
-        * As B is second, it sees A as the previous fence and so waits for
-        * it to complete its transition and takes over the occupancy for
-        * itself -- remembering that it needs to wait on A before executing.
+        * Both A and B have got a reference to C or NULL, depending on the
+        * timing of the interrupt handler.  Let's assume that if A has got C
+        * then it has locked C first (before B).
         *
         * Note the strong ordering of the timeline also provides consistent
         * nesting rules for the fence->lock; the inner lock is always the
         * older lock.
         */
        spin_lock_irqsave(fence->lock, flags);
-       prev = xchg(__active_fence_slot(active), fence);
-       if (prev) {
-               GEM_BUG_ON(prev == fence);
+       if (prev)
                spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+
+       /*
+        * A does the cmpxchg first, and so it sees C or NULL, as before, or
+        * something else, depending on the timing of other threads and/or
+        * interrupt handler.  If not the same as before then A unlocks C if
+        * applicable and retries, starting from an attempt to get a new
+        * active->fence.  Meanwhile, B follows the same path as A.
+        * Once A succeeds with cmpxch, B fails again, retires, gets A from
+        * active->fence, locks it as soon as A completes, and possibly
+        * succeeds with cmpxchg.
+        */
+       while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
+               if (prev) {
+                       spin_unlock(prev->lock);
+                       dma_fence_put(prev);
+               }
+               spin_unlock_irqrestore(fence->lock, flags);
+
+               prev = i915_active_fence_get(active);
+               GEM_BUG_ON(prev == fence);
+
+               spin_lock_irqsave(fence->lock, flags);
+               if (prev)
+                       spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+       }
+
+       /*
+        * If prev is NULL then the previous fence must have been signaled
+        * and we know that we are first on the timeline.  If it is still
+        * present then, having the lock on that fence already acquired, we
+        * serialise with the interrupt handler, in the process of removing it
+        * from any future interrupt callback.  A will then wait on C before
+        * executing (if present).
+        *
+        * As B is second, it sees A as the previous fence and so waits for
+        * it to complete its transition and takes over the occupancy for
+        * itself -- remembering that it needs to wait on A before executing.
+        */
+       if (prev) {
                __list_del_entry(&active->cb.node);
                spin_unlock(prev->lock); /* serialise with prev->cb_list */
        }
@@ -1077,11 +1122,7 @@ int i915_active_fence_set(struct i915_active_fence *active,
        int err = 0;
 
        /* Must maintain timeline ordering wrt previous active requests */
-       rcu_read_lock();
        fence = __i915_active_fence_set(active, &rq->fence);
-       if (fence) /* but the previous fence may not belong to that timeline! */
-               fence = dma_fence_get_rcu(fence);
-       rcu_read_unlock();
        if (fence) {
                err = i915_request_await_dma_fence(rq, fence);
                dma_fence_put(fence);
index 0ad0c58..7d8671f 100644 (file)
@@ -443,7 +443,6 @@ static int i915_pcode_init(struct drm_i915_private *i915)
 static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 {
        struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-       struct pci_dev *root_pdev;
        int ret;
 
        if (i915_inject_probe_failure(dev_priv))
@@ -557,15 +556,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
 
        intel_bw_init_hw(dev_priv);
 
-       /*
-        * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
-        * This should be totally removed when we handle the pci states properly
-        * on runtime PM and on s2idle cases.
-        */
-       root_pdev = pcie_find_root_port(pdev);
-       if (root_pdev)
-               pci_d3cold_disable(root_pdev);
-
        return 0;
 
 err_opregion:
@@ -591,7 +581,6 @@ err_perf:
 static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
 {
        struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
-       struct pci_dev *root_pdev;
 
        i915_perf_fini(dev_priv);
 
@@ -599,10 +588,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
 
        if (pdev->msi_enabled)
                pci_disable_msi(pdev);
-
-       root_pdev = pcie_find_root_port(pdev);
-       if (root_pdev)
-               pci_d3cold_enable(root_pdev);
 }
 
 /**
@@ -1517,6 +1502,8 @@ static int intel_runtime_suspend(struct device *kdev)
 {
        struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
        struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+       struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+       struct pci_dev *root_pdev;
        struct intel_gt *gt;
        int ret, i;
 
@@ -1568,6 +1555,15 @@ static int intel_runtime_suspend(struct device *kdev)
                drm_err(&dev_priv->drm,
                        "Unclaimed access detected prior to suspending\n");
 
+       /*
+        * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
+        * This should be totally removed when we handle the pci states properly
+        * on runtime PM.
+        */
+       root_pdev = pcie_find_root_port(pdev);
+       if (root_pdev)
+               pci_d3cold_disable(root_pdev);
+
        rpm->suspended = true;
 
        /*
@@ -1606,6 +1602,8 @@ static int intel_runtime_resume(struct device *kdev)
 {
        struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
        struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+       struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+       struct pci_dev *root_pdev;
        struct intel_gt *gt;
        int ret, i;
 
@@ -1619,6 +1617,11 @@ static int intel_runtime_resume(struct device *kdev)
 
        intel_opregion_notify_adapter(dev_priv, PCI_D0);
        rpm->suspended = false;
+
+       root_pdev = pcie_find_root_port(pdev);
+       if (root_pdev)
+               pci_d3cold_enable(root_pdev);
+
        if (intel_uncore_unclaimed_mmio(&dev_priv->uncore))
                drm_dbg(&dev_priv->drm,
                        "Unclaimed access during suspend, bios?\n");
index 3d7a5db..928975d 100644 (file)
@@ -38,9 +38,6 @@
 #include "i915_reg.h"
 #include "intel_pci_config.h"
 
-__diag_push();
-__diag_ignore_all("-Woverride-init", "Allow overriding inherited members");
-
 #define PLATFORM(x) .platform = (x)
 #define GEN(x) \
        .__runtime.graphics.ip.ver = (x), \
@@ -846,8 +843,6 @@ static const struct intel_device_info mtl_info = {
 
 #undef PLATFORM
 
-__diag_pop();
-
 /*
  * Make sure any device matches here are from most specific to most
  * general.  For example, since the Quanta match is based on the subsystem
index 0a111b2..49c6f1f 100644 (file)
@@ -868,8 +868,17 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
                        oa_report_id_clear(stream, report32);
                        oa_timestamp_clear(stream, report32);
                } else {
+                       u8 *oa_buf_end = stream->oa_buffer.vaddr +
+                                        OA_BUFFER_SIZE;
+                       u32 part = oa_buf_end - (u8 *)report32;
+
                        /* Zero out the entire report */
-                       memset(report32, 0, report_size);
+                       if (report_size <= part) {
+                               memset(report32, 0, report_size);
+                       } else {
+                               memset(report32, 0, part);
+                               memset(oa_buf_base, 0, report_size - part);
+                       }
                }
        }
 
@@ -4422,6 +4431,7 @@ static const struct i915_range mtl_oam_b_counters[] = {
 static const struct i915_range xehp_oa_b_counters[] = {
        { .start = 0xdc48, .end = 0xdc48 },     /* OAA_ENABLE_REG */
        { .start = 0xdd00, .end = 0xdd48 },     /* OAG_LCE0_0 - OAA_LENABLE_REG */
+       {}
 };
 
 static const struct i915_range gen7_oa_mux_regs[] = {
index 894068b..833b73e 100644 (file)
@@ -1661,6 +1661,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq,
 
        request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
 
+       /*
+        * Users have to put a reference potentially got by
+        * __i915_active_fence_set() to the returned request
+        * when no longer needed
+        */
        return to_request(__i915_active_fence_set(&timeline->last_request,
                                                  &rq->fence));
 }
@@ -1707,6 +1712,10 @@ __i915_request_ensure_ordering(struct i915_request *rq,
                                                         0);
        }
 
+       /*
+        * Users have to put the reference to prev potentially got
+        * by __i915_active_fence_set() when no longer needed
+        */
        return prev;
 }
 
@@ -1760,6 +1769,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
                prev = __i915_request_ensure_ordering(rq, timeline);
        else
                prev = __i915_request_ensure_parallel_ordering(rq, timeline);
+       if (prev)
+               i915_request_put(prev);
 
        /*
         * Make sure that no request gazumped us - if it was allocated after
index 5f26090..89585b3 100644 (file)
@@ -310,7 +310,7 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc)
                dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n",
                         sig_cfg.mode.hactive, new_hactive);
 
-               sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive;
+               sig_cfg.mode.hfront_porch -= new_hactive - sig_cfg.mode.hactive;
                sig_cfg.mode.hactive = new_hactive;
        }
 
index a99310b..bbb1bf3 100644 (file)
@@ -89,7 +89,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit
                         * since we've already mapped it once in
                         * submit_reloc()
                         */
-                       if (WARN_ON(!ptr))
+                       if (WARN_ON(IS_ERR_OR_NULL(ptr)))
                                return;
 
                        for (i = 0; i < dwords; i++) {
index 790f55e..e788ed7 100644 (file)
@@ -206,7 +206,7 @@ static const struct a6xx_shader_block {
        SHADER(A6XX_SP_LB_3_DATA, 0x800),
        SHADER(A6XX_SP_LB_4_DATA, 0x800),
        SHADER(A6XX_SP_LB_5_DATA, 0x200),
-       SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x2000),
+       SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x800),
        SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280),
        SHADER(A6XX_SP_UAV_DATA, 0x80),
        SHADER(A6XX_SP_INST_TAG, 0x80),
index cb94cfd..ce8d0b2 100644 (file)
@@ -369,8 +369,6 @@ static const struct adreno_info gpulist[] = {
                .hwcg = a640_hwcg,
        }, {
                .rev = ADRENO_REV(6, 9, 0, ANY_ID),
-               .revn = 690,
-               .name = "A690",
                .fw = {
                        [ADRENO_FW_SQE] = "a660_sqe.fw",
                        [ADRENO_FW_GMU] = "a690_gmu.bin",
index d8c9e8c..8450198 100644 (file)
@@ -149,7 +149,8 @@ bool adreno_cmp_rev(struct adreno_rev rev1, struct adreno_rev rev2);
 
 static inline bool adreno_is_revn(const struct adreno_gpu *gpu, uint32_t revn)
 {
-       WARN_ON_ONCE(!gpu->revn);
+       /* revn can be zero, but if not is set at same time as info */
+       WARN_ON_ONCE(!gpu->info);
 
        return gpu->revn == revn;
 }
@@ -161,14 +162,16 @@ static inline bool adreno_has_gmu_wrapper(const struct adreno_gpu *gpu)
 
 static inline bool adreno_is_a2xx(const struct adreno_gpu *gpu)
 {
-       WARN_ON_ONCE(!gpu->revn);
+       /* revn can be zero, but if not is set at same time as info */
+       WARN_ON_ONCE(!gpu->info);
 
        return (gpu->revn < 300);
 }
 
 static inline bool adreno_is_a20x(const struct adreno_gpu *gpu)
 {
-       WARN_ON_ONCE(!gpu->revn);
+       /* revn can be zero, but if not is set at same time as info */
+       WARN_ON_ONCE(!gpu->info);
 
        return (gpu->revn < 210);
 }
@@ -307,7 +310,8 @@ static inline int adreno_is_a680(const struct adreno_gpu *gpu)
 
 static inline int adreno_is_a690(const struct adreno_gpu *gpu)
 {
-       return adreno_is_revn(gpu, 690);
+       /* The order of args is important here to handle ANY_ID correctly */
+       return adreno_cmp_rev(ADRENO_REV(6, 9, 0, ANY_ID), gpu->rev);
 };
 
 /* check for a615, a616, a618, a619 or any derivatives */
index e379599..29bb8ee 100644 (file)
 #define        DPU_PERF_DEFAULT_MAX_CORE_CLK_RATE      412500000
 
 /**
- * enum dpu_core_perf_data_bus_id - data bus identifier
- * @DPU_CORE_PERF_DATA_BUS_ID_MNOC: DPU/MNOC data bus
- * @DPU_CORE_PERF_DATA_BUS_ID_LLCC: MNOC/LLCC data bus
- * @DPU_CORE_PERF_DATA_BUS_ID_EBI: LLCC/EBI data bus
- */
-enum dpu_core_perf_data_bus_id {
-       DPU_CORE_PERF_DATA_BUS_ID_MNOC,
-       DPU_CORE_PERF_DATA_BUS_ID_LLCC,
-       DPU_CORE_PERF_DATA_BUS_ID_EBI,
-       DPU_CORE_PERF_DATA_BUS_ID_MAX,
-};
-
-/**
  * struct dpu_core_perf_params - definition of performance parameters
  * @max_per_pipe_ib: maximum instantaneous bandwidth request
  * @bw_ctl: arbitrated bandwidth request
index c278fb9..86182c7 100644 (file)
@@ -51,7 +51,7 @@
 
 static const u32 fetch_tbl[SSPP_MAX] = {CTL_INVALID_BIT, 16, 17, 18, 19,
        CTL_INVALID_BIT, CTL_INVALID_BIT, CTL_INVALID_BIT, CTL_INVALID_BIT, 0,
-       1, 2, 3, CTL_INVALID_BIT, CTL_INVALID_BIT};
+       1, 2, 3, 4, 5};
 
 static int _mixer_stages(const struct dpu_lm_cfg *mixer, int count,
                enum dpu_lm lm)
@@ -198,6 +198,12 @@ static void dpu_hw_ctl_update_pending_flush_sspp(struct dpu_hw_ctl *ctx,
        case SSPP_DMA3:
                ctx->pending_flush_mask |= BIT(25);
                break;
+       case SSPP_DMA4:
+               ctx->pending_flush_mask |= BIT(13);
+               break;
+       case SSPP_DMA5:
+               ctx->pending_flush_mask |= BIT(14);
+               break;
        case SSPP_CURSOR0:
                ctx->pending_flush_mask |= BIT(22);
                break;
index 3ce45b0..31deda1 100644 (file)
@@ -1087,8 +1087,6 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs = {
 
 const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs = {
        .has_phy_lane = true,
-       .regulator_data = dsi_phy_14nm_17mA_regulators,
-       .num_regulators = ARRAY_SIZE(dsi_phy_14nm_17mA_regulators),
        .ops = {
                .enable = dsi_14nm_phy_enable,
                .disable = dsi_14nm_phy_disable,
index b933a85..bf1e17d 100644 (file)
@@ -246,10 +246,6 @@ void msm_fbdev_setup(struct drm_device *dev)
                goto err_drm_fb_helper_unprepare;
        }
 
-       ret = msm_fbdev_client_hotplug(&helper->client);
-       if (ret)
-               drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&helper->client);
 
        return;
index 96599ec..1a5d4f1 100644 (file)
@@ -191,6 +191,12 @@ msm_fence_init(struct dma_fence *fence, struct msm_fence_context *fctx)
 
        f->fctx = fctx;
 
+       /*
+        * Until this point, the fence was just some pre-allocated memory,
+        * no-one should have taken a reference to it yet.
+        */
+       WARN_ON(kref_read(&fence->refcount));
+
        dma_fence_init(&f->base, &msm_fence_ops, &fctx->spinlock,
                       fctx->context, ++fctx->last_fence);
 }
index 3f1aa4d..63c9641 100644 (file)
@@ -86,7 +86,19 @@ void __msm_gem_submit_destroy(struct kref *kref)
        }
 
        dma_fence_put(submit->user_fence);
-       dma_fence_put(submit->hw_fence);
+
+       /*
+        * If the submit is freed before msm_job_run(), then hw_fence is
+        * just some pre-allocated memory, not a reference counted fence.
+        * Once the job runs and the hw_fence is initialized, it will
+        * have a refcount of at least one, since the submit holds a ref
+        * to the hw_fence.
+        */
+       if (kref_read(&submit->hw_fence->refcount) == 0) {
+               kfree(submit->hw_fence);
+       } else {
+               dma_fence_put(submit->hw_fence);
+       }
 
        put_pid(submit->pid);
        msm_submitqueue_put(submit->queue);
@@ -889,7 +901,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
         * after the job is armed
         */
        if ((args->flags & MSM_SUBMIT_FENCE_SN_IN) &&
-                       idr_find(&queue->fence_idr, args->fence)) {
+                       (!args->fence || idr_find(&queue->fence_idr, args->fence))) {
                spin_unlock(&queue->idr_lock);
                idr_preload_end();
                ret = -EINVAL;
index 05648c9..798bd4f 100644 (file)
@@ -189,6 +189,7 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss)
 #define UBWC_2_0 0x20000000
 #define UBWC_3_0 0x30000000
 #define UBWC_4_0 0x40000000
+#define UBWC_4_3 0x40030000
 
 static void msm_mdss_setup_ubwc_dec_20(struct msm_mdss *msm_mdss)
 {
@@ -227,7 +228,10 @@ static void msm_mdss_setup_ubwc_dec_40(struct msm_mdss *msm_mdss)
                writel_relaxed(1, msm_mdss->mmio + UBWC_CTRL_2);
                writel_relaxed(0, msm_mdss->mmio + UBWC_PREDICTION_MODE);
        } else {
-               writel_relaxed(2, msm_mdss->mmio + UBWC_CTRL_2);
+               if (data->ubwc_dec_version == UBWC_4_3)
+                       writel_relaxed(3, msm_mdss->mmio + UBWC_CTRL_2);
+               else
+                       writel_relaxed(2, msm_mdss->mmio + UBWC_CTRL_2);
                writel_relaxed(1, msm_mdss->mmio + UBWC_PREDICTION_MODE);
        }
 }
@@ -271,6 +275,7 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
                msm_mdss_setup_ubwc_dec_30(msm_mdss);
                break;
        case UBWC_4_0:
+       case UBWC_4_3:
                msm_mdss_setup_ubwc_dec_40(msm_mdss);
                break;
        default:
@@ -569,6 +574,16 @@ static const struct msm_mdss_data sm8250_data = {
        .macrotile_mode = 1,
 };
 
+static const struct msm_mdss_data sm8550_data = {
+       .ubwc_version = UBWC_4_0,
+       .ubwc_dec_version = UBWC_4_3,
+       .ubwc_swizzle = 6,
+       .ubwc_static = 1,
+       /* TODO: highest_bank_bit = 2 for LP_DDR4 */
+       .highest_bank_bit = 3,
+       .macrotile_mode = 1,
+};
+
 static const struct of_device_id mdss_dt_match[] = {
        { .compatible = "qcom,mdss" },
        { .compatible = "qcom,msm8998-mdss" },
@@ -585,7 +600,7 @@ static const struct of_device_id mdss_dt_match[] = {
        { .compatible = "qcom,sm8250-mdss", .data = &sm8250_data },
        { .compatible = "qcom,sm8350-mdss", .data = &sm8250_data },
        { .compatible = "qcom,sm8450-mdss", .data = &sm8250_data },
-       { .compatible = "qcom,sm8550-mdss", .data = &sm8250_data },
+       { .compatible = "qcom,sm8550-mdss", .data = &sm8550_data },
        {}
 };
 MODULE_DEVICE_TABLE(of, mdss_dt_match);
index 42e1665..1bec819 100644 (file)
@@ -910,15 +910,19 @@ nv50_msto_prepare(struct drm_atomic_state *state,
        struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
        struct nv50_mstc *mstc = msto->mstc;
        struct nv50_mstm *mstm = mstc->mstm;
-       struct drm_dp_mst_atomic_payload *payload;
+       struct drm_dp_mst_topology_state *old_mst_state;
+       struct drm_dp_mst_atomic_payload *payload, *old_payload;
 
        NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name);
 
+       old_mst_state = drm_atomic_get_old_mst_topology_state(state, mgr);
+
        payload = drm_atomic_get_mst_payload_state(mst_state, mstc->port);
+       old_payload = drm_atomic_get_mst_payload_state(old_mst_state, mstc->port);
 
        // TODO: Figure out if we want to do a better job of handling VCPI allocation failures here?
        if (msto->disabled) {
-               drm_dp_remove_payload(mgr, mst_state, payload, payload);
+               drm_dp_remove_payload(mgr, mst_state, old_payload, payload);
 
                nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0);
        } else {
@@ -1873,6 +1877,8 @@ nv50_pior_destroy(struct drm_encoder *encoder)
        nvif_outp_dtor(&nv_encoder->outp);
 
        drm_encoder_cleanup(encoder);
+
+       mutex_destroy(&nv_encoder->dp.hpd_irq_lock);
        kfree(encoder);
 }
 
@@ -1917,6 +1923,8 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
        nv_encoder->i2c = ddc;
        nv_encoder->aux = aux;
 
+       mutex_init(&nv_encoder->dp.hpd_irq_lock);
+
        encoder = to_drm_encoder(nv_encoder);
        encoder->possible_crtcs = dcbe->heads;
        encoder->possible_clones = 0;
index 40a1065..ef441df 100644 (file)
@@ -16,7 +16,7 @@ struct nvkm_i2c_bus {
        const struct nvkm_i2c_bus_func *func;
        struct nvkm_i2c_pad *pad;
 #define NVKM_I2C_BUS_CCB(n) /* 'n' is ccb index */                           (n)
-#define NVKM_I2C_BUS_EXT(n) /* 'n' is dcb external encoder type */ ((n) + 0x100)
+#define NVKM_I2C_BUS_EXT(n) /* 'n' is dcb external encoder type */  ((n) + 0x10)
 #define NVKM_I2C_BUS_PRI /* ccb primary comm. port */                        -1
 #define NVKM_I2C_BUS_SEC /* ccb secondary comm. port */                      -2
        int id;
@@ -38,7 +38,7 @@ struct nvkm_i2c_aux {
        const struct nvkm_i2c_aux_func *func;
        struct nvkm_i2c_pad *pad;
 #define NVKM_I2C_AUX_CCB(n) /* 'n' is ccb index */                           (n)
-#define NVKM_I2C_AUX_EXT(n) /* 'n' is dcb external encoder type */ ((n) + 0x100)
+#define NVKM_I2C_AUX_EXT(n) /* 'n' is dcb external encoder type */  ((n) + 0x10)
        int id;
 
        struct mutex mutex;
index e648ecd..3dfbc37 100644 (file)
@@ -90,6 +90,7 @@ nouveau_channel_del(struct nouveau_channel **pchan)
                if (cli)
                        nouveau_svmm_part(chan->vmm->svmm, chan->inst);
 
+               nvif_object_dtor(&chan->blit);
                nvif_object_dtor(&chan->nvsw);
                nvif_object_dtor(&chan->gart);
                nvif_object_dtor(&chan->vram);
index e06a8ff..bad7466 100644 (file)
@@ -53,6 +53,7 @@ struct nouveau_channel {
        u32 user_put;
 
        struct nvif_object user;
+       struct nvif_object blit;
 
        struct nvif_event kill;
        atomic_t killed;
index f75c6f0..622f6eb 100644 (file)
@@ -967,7 +967,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
        /* Determine display colour depth for everything except LVDS now,
         * DP requires this before mode_valid() is called.
         */
-       if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode)
+       if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
                nouveau_connector_detect_depth(connector);
 
        /* Find the native mode if this is a digital panel, if we didn't
@@ -1408,8 +1408,7 @@ nouveau_connector_create(struct drm_device *dev,
                ret = nvif_conn_ctor(&disp->disp, nv_connector->base.name, nv_connector->index,
                                     &nv_connector->conn);
                if (ret) {
-                       kfree(nv_connector);
-                       return ERR_PTR(ret);
+                       goto drm_conn_err;
                }
 
                ret = nvif_conn_event_ctor(&nv_connector->conn, "kmsHotplug",
@@ -1426,8 +1425,7 @@ nouveau_connector_create(struct drm_device *dev,
                        if (ret) {
                                nvif_event_dtor(&nv_connector->hpd);
                                nvif_conn_dtor(&nv_connector->conn);
-                               kfree(nv_connector);
-                               return ERR_PTR(ret);
+                               goto drm_conn_err;
                        }
                }
        }
@@ -1475,4 +1473,9 @@ nouveau_connector_create(struct drm_device *dev,
 
        drm_connector_register(connector);
        return connector;
+
+drm_conn_err:
+       drm_connector_cleanup(connector);
+       kfree(nv_connector);
+       return ERR_PTR(ret);
 }
index 7aac938..40fb9a8 100644 (file)
@@ -375,15 +375,29 @@ nouveau_accel_gr_init(struct nouveau_drm *drm)
                ret = nvif_object_ctor(&drm->channel->user, "drmNvsw",
                                       NVDRM_NVSW, nouveau_abi16_swclass(drm),
                                       NULL, 0, &drm->channel->nvsw);
+
+               if (ret == 0 && device->info.chipset >= 0x11) {
+                       ret = nvif_object_ctor(&drm->channel->user, "drmBlit",
+                                              0x005f, 0x009f,
+                                              NULL, 0, &drm->channel->blit);
+               }
+
                if (ret == 0) {
                        struct nvif_push *push = drm->channel->chan.push;
-                       ret = PUSH_WAIT(push, 2);
-                       if (ret == 0)
+                       ret = PUSH_WAIT(push, 8);
+                       if (ret == 0) {
+                               if (device->info.chipset >= 0x11) {
+                                       PUSH_NVSQ(push, NV05F, 0x0000, drm->channel->blit.handle);
+                                       PUSH_NVSQ(push, NV09F, 0x0120, 0,
+                                                              0x0124, 1,
+                                                              0x0128, 2);
+                               }
                                PUSH_NVSQ(push, NV_SW, 0x0000, drm->channel->nvsw.handle);
+                       }
                }
 
                if (ret) {
-                       NV_ERROR(drm, "failed to allocate sw class, %d\n", ret);
+                       NV_ERROR(drm, "failed to allocate sw or blit class, %d\n", ret);
                        nouveau_accel_gr_fini(drm);
                        return;
                }
index 40c8ea4..b8ac66b 100644 (file)
@@ -26,6 +26,8 @@
 #include "head.h"
 #include "ior.h"
 
+#include <drm/display/drm_dp.h>
+
 #include <subdev/bios.h>
 #include <subdev/bios/init.h>
 #include <subdev/gpio.h>
@@ -634,6 +636,50 @@ nvkm_dp_enable_supported_link_rates(struct nvkm_outp *outp)
        return outp->dp.rates != 0;
 }
 
+/* XXX: This is a big fat hack, and this is just drm_dp_read_dpcd_caps()
+ * converted to work inside nvkm. This is a temporary holdover until we start
+ * passing the drm_dp_aux device through NVKM
+ */
+static int
+nvkm_dp_read_dpcd_caps(struct nvkm_outp *outp)
+{
+       struct nvkm_i2c_aux *aux = outp->dp.aux;
+       u8 dpcd_ext[DP_RECEIVER_CAP_SIZE];
+       int ret;
+
+       ret = nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, DP_RECEIVER_CAP_SIZE);
+       if (ret < 0)
+               return ret;
+
+       /*
+        * Prior to DP1.3 the bit represented by
+        * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved.
+        * If it is set DP_DPCD_REV at 0000h could be at a value less than
+        * the true capability of the panel. The only way to check is to
+        * then compare 0000h and 2200h.
+        */
+       if (!(outp->dp.dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+             DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT))
+               return 0;
+
+       ret = nvkm_rdaux(aux, DP_DP13_DPCD_REV, dpcd_ext, sizeof(dpcd_ext));
+       if (ret < 0)
+               return ret;
+
+       if (outp->dp.dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) {
+               OUTP_DBG(outp, "Extended DPCD rev less than base DPCD rev (%d > %d)\n",
+                        outp->dp.dpcd[DP_DPCD_REV], dpcd_ext[DP_DPCD_REV]);
+               return 0;
+       }
+
+       if (!memcmp(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext)))
+               return 0;
+
+       memcpy(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext));
+
+       return 0;
+}
+
 void
 nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
 {
@@ -689,7 +735,7 @@ nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
                        memset(outp->dp.lttpr, 0x00, sizeof(outp->dp.lttpr));
                }
 
-               if (!nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, sizeof(outp->dp.dpcd))) {
+               if (!nvkm_dp_read_dpcd_caps(outp)) {
                        const u8 rates[] = { 0x1e, 0x14, 0x0a, 0x06, 0 };
                        const u8 *rate;
                        int rate_max;
index a4853c4..67ef889 100644 (file)
@@ -295,6 +295,7 @@ g94_sor = {
        .clock = nv50_sor_clock,
        .war_2 = g94_sor_war_2,
        .war_3 = g94_sor_war_3,
+       .hdmi = &g84_sor_hdmi,
        .dp = &g94_sor_dp,
 };
 
index a2c7c6f..506ffbe 100644 (file)
@@ -125,7 +125,7 @@ gt215_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 siz
        pack_hdmi_infoframe(&avi, data, size);
 
        nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000000);
-       if (size)
+       if (!size)
                return;
 
        nvkm_wr32(device, 0x61c528 + soff, avi.header);
index dad942b..46b057f 100644 (file)
@@ -81,20 +81,29 @@ nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_
                return -ENOSYS;
 
        list_for_each_entry(outp, &conn->disp->outps, head) {
-               if (outp->info.connector == conn->index && outp->dp.aux) {
-                       if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_I2C_PLUG;
-                       if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_I2C_UNPLUG;
-                       if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ   ) bits |= NVKM_I2C_IRQ;
+               if (outp->info.connector == conn->index)
+                       break;
+       }
 
-                       return nvkm_uevent_add(uevent, &device->i2c->event, outp->dp.aux->id, bits,
-                                              nvkm_uconn_uevent_aux);
-               }
+       if (&outp->head == &conn->disp->outps)
+               return -EINVAL;
+
+       if (outp->dp.aux && !outp->info.location) {
+               if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_I2C_PLUG;
+               if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_I2C_UNPLUG;
+               if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ   ) bits |= NVKM_I2C_IRQ;
+
+               return nvkm_uevent_add(uevent, &device->i2c->event, outp->dp.aux->id, bits,
+                                      nvkm_uconn_uevent_aux);
        }
 
        if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_GPIO_HI;
        if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_GPIO_LO;
-       if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ)
-               return -EINVAL;
+       if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ) {
+               /* TODO: support DP IRQ on ANX9805 and remove this hack. */
+               if (!outp->info.location)
+                       return -EINVAL;
+       }
 
        return nvkm_uevent_add(uevent, &device->gpio->event, conn->info.hpd, bits,
                               nvkm_uconn_uevent_gpio);
index 00dbeda..de161e7 100644 (file)
@@ -117,6 +117,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110_grctx;
 void gk110_grctx_generate_r419eb0(struct gf100_gr *);
+void gk110_grctx_generate_r419f78(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110b_grctx;
 extern const struct gf100_grctx_func gk208_grctx;
index 94233d0..52a234b 100644 (file)
@@ -906,7 +906,9 @@ static void
 gk104_grctx_generate_r419f78(struct gf100_gr *gr)
 {
        struct nvkm_device *device = gr->base.engine.subdev.device;
-       nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
+
+       /* bit 3 set disables loads in fp helper invocations, we need it enabled */
+       nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000);
 }
 
 void
index 4391458..3acdd9e 100644 (file)
@@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
        nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
 }
 
+void
+gk110_grctx_generate_r419f78(struct gf100_gr *gr)
+{
+       struct nvkm_device *device = gr->base.engine.subdev.device;
+
+       /* bit 3 set disables loads in fp helper invocations, we need it enabled */
+       nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000);
+}
+
 const struct gf100_grctx_func
 gk110_grctx = {
        .main  = gf100_grctx_generate_main,
@@ -854,4 +863,5 @@ gk110_grctx = {
        .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
        .r418800 = gk104_grctx_generate_r418800,
        .r419eb0 = gk110_grctx_generate_r419eb0,
+       .r419f78 = gk110_grctx_generate_r419f78,
 };
index 7b9a34f..5597e87 100644 (file)
@@ -103,4 +103,5 @@ gk110b_grctx = {
        .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
        .r418800 = gk104_grctx_generate_r418800,
        .r419eb0 = gk110_grctx_generate_r419eb0,
+       .r419f78 = gk110_grctx_generate_r419f78,
 };
index c78d07a..6126564 100644 (file)
@@ -568,4 +568,5 @@ gk208_grctx = {
        .dist_skip_table = gf117_grctx_generate_dist_skip_table,
        .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
        .r418800 = gk104_grctx_generate_r418800,
+       .r419f78 = gk110_grctx_generate_r419f78,
 };
index beac66e..9906974 100644 (file)
@@ -988,4 +988,5 @@ gm107_grctx = {
        .r406500 = gm107_grctx_generate_r406500,
        .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
        .r419e00 = gm107_grctx_generate_r419e00,
+       .r419f78 = gk110_grctx_generate_r419f78,
 };
index 3b6c810..a7775aa 100644 (file)
@@ -206,19 +206,6 @@ tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
        return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack);
 }
 
-int
-tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
-{
-       int ret;
-
-       ret = gm200_gr_load(gr, ver, fwif);
-       if (ret)
-               return ret;
-
-       return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid,
-                                &gr->bundle_veid);
-}
-
 static const struct gf100_gr_fwif
 tu102_gr_fwif[] = {
        {  0, gm200_gr_load, &tu102_gr, &gp108_gr_fecs_acr, &gp108_gr_gpccs_acr },
index 795f3a6..9b8ca4e 100644 (file)
@@ -224,7 +224,7 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev)
        u64 falcons;
        int ret, i;
 
-       if (list_empty(&acr->hsfw)) {
+       if (list_empty(&acr->hsfw) || !acr->func || !acr->func->wpr_layout) {
                nvkm_debug(subdev, "No HSFW(s)\n");
                nvkm_acr_cleanup(acr);
                return 0;
index 976539d..731b2f6 100644 (file)
@@ -260,10 +260,11 @@ nvkm_i2c_new_(const struct nvkm_i2c_func *func, struct nvkm_device *device,
 {
        struct nvkm_bios *bios = device->bios;
        struct nvkm_i2c *i2c;
+       struct nvkm_i2c_aux *aux;
        struct dcb_i2c_entry ccbE;
        struct dcb_output dcbE;
        u8 ver, hdr;
-       int ret, i;
+       int ret, i, ids;
 
        if (!(i2c = *pi2c = kzalloc(sizeof(*i2c), GFP_KERNEL)))
                return -ENOMEM;
@@ -406,5 +407,11 @@ nvkm_i2c_new_(const struct nvkm_i2c_func *func, struct nvkm_device *device,
                }
        }
 
-       return nvkm_event_init(&nvkm_i2c_intr_func, &i2c->subdev, 4, i, &i2c->event);
+       ids = 0;
+       list_for_each_entry(aux, &i2c->aux, head)
+               ids = max(ids, aux->id + 1);
+       if (!ids)
+               return 0;
+
+       return nvkm_event_init(&nvkm_i2c_intr_func, &i2c->subdev, 4, ids, &i2c->event);
 }
index b7ccce0..fe6639c 100644 (file)
@@ -318,10 +318,6 @@ void omap_fbdev_setup(struct drm_device *dev)
 
        INIT_WORK(&fbdev->work, pan_worker);
 
-       ret = omap_fbdev_client_hotplug(&helper->client);
-       if (ret)
-               drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&helper->client);
 
        return;
index 8f4f137..2130084 100644 (file)
@@ -404,38 +404,30 @@ static int jdi_panel_add(struct jdi_panel *jdi)
 
        ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(jdi->supplies),
                                      jdi->supplies);
-       if (ret < 0) {
-               dev_err(dev, "failed to init regulator, ret=%d\n", ret);
-               return ret;
-       }
+       if (ret < 0)
+               return dev_err_probe(dev, ret,
+                                    "failed to init regulator, ret=%d\n", ret);
 
        jdi->enable_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
        if (IS_ERR(jdi->enable_gpio)) {
-               ret = PTR_ERR(jdi->enable_gpio);
-               dev_err(dev, "cannot get enable-gpio %d\n", ret);
-               return ret;
+               return dev_err_probe(dev, PTR_ERR(jdi->enable_gpio),
+                                    "cannot get enable-gpio %d\n", ret);
        }
 
        jdi->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
-       if (IS_ERR(jdi->reset_gpio)) {
-               ret = PTR_ERR(jdi->reset_gpio);
-               dev_err(dev, "cannot get reset-gpios %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(jdi->reset_gpio))
+               return dev_err_probe(dev, PTR_ERR(jdi->reset_gpio),
+                                    "cannot get reset-gpios %d\n", ret);
 
        jdi->dcdc_en_gpio = devm_gpiod_get(dev, "dcdc-en", GPIOD_OUT_LOW);
-       if (IS_ERR(jdi->dcdc_en_gpio)) {
-               ret = PTR_ERR(jdi->dcdc_en_gpio);
-               dev_err(dev, "cannot get dcdc-en-gpio %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(jdi->dcdc_en_gpio))
+               return dev_err_probe(dev, PTR_ERR(jdi->dcdc_en_gpio),
+                                    "cannot get dcdc-en-gpio %d\n", ret);
 
        jdi->backlight = drm_panel_create_dsi_backlight(jdi->dsi);
-       if (IS_ERR(jdi->backlight)) {
-               ret = PTR_ERR(jdi->backlight);
-               dev_err(dev, "failed to register backlight %d\n", ret);
-               return ret;
-       }
+       if (IS_ERR(jdi->backlight))
+               return dev_err_probe(dev, PTR_ERR(jdi->backlight),
+                                    "failed to register backlight %d\n", ret);
 
        drm_panel_init(&jdi->base, &jdi->dsi->dev, &jdi_panel_funcs,
                       DRM_MODE_CONNECTOR_DSI);
index 102e1fc..be4ec5b 100644 (file)
@@ -569,6 +569,7 @@ static const struct of_device_id s6d7aa0_of_match[] = {
        },
        { /* sentinel */ }
 };
+MODULE_DEVICE_TABLE(of, s6d7aa0_of_match);
 
 static struct mipi_dsi_driver s6d7aa0_driver = {
        .probe = s6d7aa0_probe,
index a247a0e..b38d0e9 100644 (file)
@@ -999,21 +999,21 @@ static const struct panel_desc auo_g104sn02 = {
        .connector_type = DRM_MODE_CONNECTOR_LVDS,
 };
 
-static const struct drm_display_mode auo_g121ean01_mode = {
-       .clock = 66700,
-       .hdisplay = 1280,
-       .hsync_start = 1280 + 58,
-       .hsync_end = 1280 + 58 + 8,
-       .htotal = 1280 + 58 + 8 + 70,
-       .vdisplay = 800,
-       .vsync_start = 800 + 6,
-       .vsync_end = 800 + 6 + 4,
-       .vtotal = 800 + 6 + 4 + 10,
+static const struct display_timing auo_g121ean01_timing = {
+       .pixelclock = { 60000000, 74400000, 90000000 },
+       .hactive = { 1280, 1280, 1280 },
+       .hfront_porch = { 20, 50, 100 },
+       .hback_porch = { 20, 50, 100 },
+       .hsync_len = { 30, 100, 200 },
+       .vactive = { 800, 800, 800 },
+       .vfront_porch = { 2, 10, 25 },
+       .vback_porch = { 2, 10, 25 },
+       .vsync_len = { 4, 18, 50 },
 };
 
 static const struct panel_desc auo_g121ean01 = {
-       .modes = &auo_g121ean01_mode,
-       .num_modes = 1,
+       .timings = &auo_g121ean01_timing,
+       .num_timings = 1,
        .bpc = 8,
        .size = {
                .width = 261,
@@ -2178,6 +2178,7 @@ static const struct panel_desc innolux_at043tn24 = {
                .height = 54,
        },
        .bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+       .connector_type = DRM_MODE_CONNECTOR_DPI,
        .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
@@ -3202,6 +3203,7 @@ static const struct drm_display_mode powertip_ph800480t013_idf02_mode = {
        .vsync_start = 480 + 49,
        .vsync_end = 480 + 49 + 2,
        .vtotal = 480 + 49 + 2 + 22,
+       .flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
 };
 
 static const struct panel_desc powertip_ph800480t013_idf02  = {
index 58dfb15..e78de99 100644 (file)
@@ -96,7 +96,7 @@ static int panfrost_read_speedbin(struct device *dev)
                 * keep going without it; any other error means that we are
                 * supposed to read the bin value, but we failed doing so.
                 */
-               if (ret != -ENOENT) {
+               if (ret != -ENOENT && ret != -EOPNOTSUPP) {
                        DRM_DEV_ERROR(dev, "Cannot read speed-bin (%d).", ret);
                        return ret;
                }
index ea993d7..307a890 100644 (file)
@@ -310,7 +310,7 @@ int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
                                      u32 domain,
                                      size_t size,
                                      struct qxl_surface *surf,
-                                     struct qxl_bo **qobj,
+                                     struct drm_gem_object **gobj,
                                      uint32_t *handle);
 void qxl_gem_object_free(struct drm_gem_object *gobj);
 int qxl_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv);
index d636ba6..17df5c7 100644 (file)
@@ -34,6 +34,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
 {
        struct qxl_device *qdev = to_qxl(dev);
        struct qxl_bo *qobj;
+       struct drm_gem_object *gobj;
        uint32_t handle;
        int r;
        struct qxl_surface surf;
@@ -62,11 +63,13 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
 
        r = qxl_gem_object_create_with_handle(qdev, file_priv,
                                              QXL_GEM_DOMAIN_CPU,
-                                             args->size, &surf, &qobj,
+                                             args->size, &surf, &gobj,
                                              &handle);
        if (r)
                return r;
+       qobj = gem_to_qxl_bo(gobj);
        qobj->is_dumb = true;
+       drm_gem_object_put(gobj);
        args->pitch = pitch;
        args->handle = handle;
        return 0;
index a08da0b..fc5e376 100644 (file)
@@ -72,32 +72,41 @@ int qxl_gem_object_create(struct qxl_device *qdev, int size,
        return 0;
 }
 
+/*
+ * If the caller passed a valid gobj pointer, it is responsible to call
+ * drm_gem_object_put() when it no longer needs to acess the object.
+ *
+ * If gobj is NULL, it is handled internally.
+ */
 int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
                                      struct drm_file *file_priv,
                                      u32 domain,
                                      size_t size,
                                      struct qxl_surface *surf,
-                                     struct qxl_bo **qobj,
+                                     struct drm_gem_object **gobj,
                                      uint32_t *handle)
 {
-       struct drm_gem_object *gobj;
        int r;
+       struct drm_gem_object *local_gobj;
 
-       BUG_ON(!qobj);
        BUG_ON(!handle);
 
        r = qxl_gem_object_create(qdev, size, 0,
                                  domain,
                                  false, false, surf,
-                                 &gobj);
+                                 &local_gobj);
        if (r)
                return -ENOMEM;
-       r = drm_gem_handle_create(file_priv, gobj, handle);
+       r = drm_gem_handle_create(file_priv, local_gobj, handle);
        if (r)
                return r;
-       /* drop reference from allocate - handle holds it now */
-       *qobj = gem_to_qxl_bo(gobj);
-       drm_gem_object_put(gobj);
+
+       if (gobj)
+               *gobj = local_gobj;
+       else
+               /* drop reference from allocate - handle holds it now */
+               drm_gem_object_put(local_gobj);
+
        return 0;
 }
 
index 30f58b2..dd0f834 100644 (file)
@@ -38,7 +38,6 @@ int qxl_alloc_ioctl(struct drm_device *dev, void *data, struct drm_file *file_pr
        struct qxl_device *qdev = to_qxl(dev);
        struct drm_qxl_alloc *qxl_alloc = data;
        int ret;
-       struct qxl_bo *qobj;
        uint32_t handle;
        u32 domain = QXL_GEM_DOMAIN_VRAM;
 
@@ -50,7 +49,7 @@ int qxl_alloc_ioctl(struct drm_device *dev, void *data, struct drm_file *file_pr
                                                domain,
                                                qxl_alloc->size,
                                                NULL,
-                                               &qobj, &handle);
+                                               NULL, &handle);
        if (ret) {
                DRM_ERROR("%s: failed to create gem ret=%d\n",
                          __func__, ret);
@@ -386,7 +385,6 @@ int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
 {
        struct qxl_device *qdev = to_qxl(dev);
        struct drm_qxl_alloc_surf *param = data;
-       struct qxl_bo *qobj;
        int handle;
        int ret;
        int size, actual_stride;
@@ -406,7 +404,7 @@ int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
                                                QXL_GEM_DOMAIN_SURFACE,
                                                size,
                                                &surf,
-                                               &qobj, &handle);
+                                               NULL, &handle);
        if (ret) {
                DRM_ERROR("%s: failed to create gem ret=%d\n",
                          __func__, ret);
index ab9c1ab..f941e2e 100644 (file)
@@ -383,10 +383,6 @@ void radeon_fbdev_setup(struct radeon_device *rdev)
                goto err_drm_client_init;
        }
 
-       ret = radeon_fbdev_client_hotplug(&fb_helper->client);
-       if (ret)
-               drm_dbg_kms(rdev->ddev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&fb_helper->client);
 
        return;
index a530ecc..bf34498 100644 (file)
@@ -833,12 +833,12 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
         * need align with 2 pixel.
         */
        if (fb->format->is_yuv && ((new_plane_state->src.x1 >> 16) % 2)) {
-               DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n");
+               DRM_DEBUG_KMS("Invalid Source: Yuv format not support odd xpos\n");
                return -EINVAL;
        }
 
        if (fb->format->is_yuv && new_plane_state->rotation & DRM_MODE_REFLECT_Y) {
-               DRM_ERROR("Invalid Source: Yuv format does not support this rotation\n");
+               DRM_DEBUG_KMS("Invalid Source: Yuv format does not support this rotation\n");
                return -EINVAL;
        }
 
@@ -846,7 +846,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
                struct vop *vop = to_vop(crtc);
 
                if (!vop->data->afbc) {
-                       DRM_ERROR("vop does not support AFBC\n");
+                       DRM_DEBUG_KMS("vop does not support AFBC\n");
                        return -EINVAL;
                }
 
@@ -855,15 +855,16 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
                        return ret;
 
                if (new_plane_state->src.x1 || new_plane_state->src.y1) {
-                       DRM_ERROR("AFBC does not support offset display, xpos=%d, ypos=%d, offset=%d\n",
-                                 new_plane_state->src.x1,
-                                 new_plane_state->src.y1, fb->offsets[0]);
+                       DRM_DEBUG_KMS("AFBC does not support offset display, " \
+                                     "xpos=%d, ypos=%d, offset=%d\n",
+                                     new_plane_state->src.x1, new_plane_state->src.y1,
+                                     fb->offsets[0]);
                        return -EINVAL;
                }
 
                if (new_plane_state->rotation && new_plane_state->rotation != DRM_MODE_ROTATE_0) {
-                       DRM_ERROR("No rotation support in AFBC, rotation=%d\n",
-                                 new_plane_state->rotation);
+                       DRM_DEBUG_KMS("No rotation support in AFBC, rotation=%d\n",
+                                     new_plane_state->rotation);
                        return -EINVAL;
                }
        }
index b2bbc8a..a42763e 100644 (file)
@@ -176,16 +176,32 @@ static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
 {
        struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
                                                 finish_cb);
-       int r;
+       unsigned long index;
 
        dma_fence_put(f);
 
        /* Wait for all dependencies to avoid data corruptions */
-       while (!xa_empty(&job->dependencies)) {
-               f = xa_erase(&job->dependencies, job->last_dependency++);
-               r = dma_fence_add_callback(f, &job->finish_cb,
-                                          drm_sched_entity_kill_jobs_cb);
-               if (!r)
+       xa_for_each(&job->dependencies, index, f) {
+               struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+
+               if (s_fence && f == &s_fence->scheduled) {
+                       /* The dependencies array had a reference on the scheduled
+                        * fence, and the finished fence refcount might have
+                        * dropped to zero. Use dma_fence_get_rcu() so we get
+                        * a NULL fence in that case.
+                        */
+                       f = dma_fence_get_rcu(&s_fence->finished);
+
+                       /* Now that we have a reference on the finished fence,
+                        * we can release the reference the dependencies array
+                        * had on the scheduled fence.
+                        */
+                       dma_fence_put(&s_fence->scheduled);
+               }
+
+               xa_erase(&job->dependencies, index);
+               if (f && !dma_fence_add_callback(f, &job->finish_cb,
+                                                drm_sched_entity_kill_jobs_cb))
                        return;
 
                dma_fence_put(f);
@@ -415,8 +431,17 @@ static struct dma_fence *
 drm_sched_job_dependency(struct drm_sched_job *job,
                         struct drm_sched_entity *entity)
 {
-       if (!xa_empty(&job->dependencies))
-               return xa_erase(&job->dependencies, job->last_dependency++);
+       struct dma_fence *f;
+
+       /* We keep the fence around, so we can iterate over all dependencies
+        * in drm_sched_entity_kill_jobs_cb() to ensure all deps are signaled
+        * before killing the job.
+        */
+       f = xa_load(&job->dependencies, job->last_dependency);
+       if (f) {
+               job->last_dependency++;
+               return dma_fence_get(f);
+       }
 
        if (job->sched->ops->prepare_job)
                return job->sched->ops->prepare_job(job, entity);
index ef12047..06cedfe 100644 (file)
@@ -48,8 +48,32 @@ static void __exit drm_sched_fence_slab_fini(void)
        kmem_cache_destroy(sched_fence_slab);
 }
 
-void drm_sched_fence_scheduled(struct drm_sched_fence *fence)
+static void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
+                                      struct dma_fence *fence)
 {
+       /*
+        * smp_store_release() to ensure another thread racing us
+        * in drm_sched_fence_set_deadline_finished() sees the
+        * fence's parent set before test_bit()
+        */
+       smp_store_release(&s_fence->parent, dma_fence_get(fence));
+       if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT,
+                    &s_fence->finished.flags))
+               dma_fence_set_deadline(fence, s_fence->deadline);
+}
+
+void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
+                              struct dma_fence *parent)
+{
+       /* Set the parent before signaling the scheduled fence, such that,
+        * any waiter expecting the parent to be filled after the job has
+        * been scheduled (which is the case for drivers delegating waits
+        * to some firmware) doesn't have to busy wait for parent to show
+        * up.
+        */
+       if (!IS_ERR_OR_NULL(parent))
+               drm_sched_fence_set_parent(fence, parent);
+
        dma_fence_signal(&fence->scheduled);
 }
 
@@ -181,20 +205,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f)
 }
 EXPORT_SYMBOL(to_drm_sched_fence);
 
-void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
-                               struct dma_fence *fence)
-{
-       /*
-        * smp_store_release() to ensure another thread racing us
-        * in drm_sched_fence_set_deadline_finished() sees the
-        * fence's parent set before test_bit()
-        */
-       smp_store_release(&s_fence->parent, dma_fence_get(fence));
-       if (test_bit(DRM_SCHED_FENCE_FLAG_HAS_DEADLINE_BIT,
-                    &s_fence->finished.flags))
-               dma_fence_set_deadline(fence, s_fence->deadline);
-}
-
 struct drm_sched_fence *drm_sched_fence_alloc(struct drm_sched_entity *entity,
                                              void *owner)
 {
index 7b2bfc1..506371c 100644 (file)
@@ -1043,10 +1043,9 @@ static int drm_sched_main(void *param)
                trace_drm_run_job(sched_job, entity);
                fence = sched->ops->run_job(sched_job);
                complete_all(&entity->entity_idle);
-               drm_sched_fence_scheduled(s_fence);
+               drm_sched_fence_scheduled(s_fence, fence);
 
                if (!IS_ERR_OR_NULL(fence)) {
-                       drm_sched_fence_set_parent(s_fence, fence);
                        /* Drop for original kref_init of the fence */
                        dma_fence_put(fence);
 
index e74d9be..d042234 100644 (file)
@@ -225,10 +225,6 @@ void tegra_fbdev_setup(struct drm_device *dev)
        if (ret)
                goto err_drm_client_init;
 
-       ret = tegra_fbdev_client_hotplug(&helper->client);
-       if (ret)
-               drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
        drm_client_register(&helper->client);
 
        return;
index bd5dae4..54e3083 100644 (file)
@@ -458,18 +458,18 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo,
                goto out;
        }
 
-bounce:
-       ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop);
-       if (ret == -EMULTIHOP) {
+       do {
+               ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop);
+               if (ret != -EMULTIHOP)
+                       break;
+
                ret = ttm_bo_bounce_temp_buffer(bo, &evict_mem, ctx, &hop);
-               if (ret) {
-                       if (ret != -ERESTARTSYS && ret != -EINTR)
-                               pr_err("Buffer eviction failed\n");
-                       ttm_resource_free(bo, &evict_mem);
-                       goto out;
-               }
-               /* try and move to final place now. */
-               goto bounce;
+       } while (!ret);
+
+       if (ret) {
+               ttm_resource_free(bo, &evict_mem);
+               if (ret != -ERESTARTSYS && ret != -EINTR)
+                       pr_err("Buffer eviction failed\n");
        }
 out:
        return ret;
@@ -517,6 +517,13 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
 {
        bool ret = false;
 
+       if (bo->pin_count) {
+               *locked = false;
+               if (busy)
+                       *busy = false;
+               return false;
+       }
+
        if (bo->base.resv == ctx->resv) {
                dma_resv_assert_held(bo->base.resv);
                if (ctx->allow_res_evict)
@@ -1167,6 +1174,7 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
                ret = ttm_bo_handle_move_mem(bo, evict_mem, true, &ctx, &hop);
                if (unlikely(ret != 0)) {
                        WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput - likely driver bug.\n");
+                       ttm_resource_free(bo, &evict_mem);
                        goto out;
                }
        }
index 7333f7a..46ff9c7 100644 (file)
@@ -86,6 +86,8 @@ static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos,
                                       struct ttm_resource *res)
 {
        if (pos->last != res) {
+               if (pos->first == res)
+                       pos->first = list_next_entry(res, lru);
                list_move(&res->lru, &pos->last->lru);
                pos->last = res;
        }
@@ -111,7 +113,8 @@ static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
 {
        struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
 
-       if (unlikely(pos->first == res && pos->last == res)) {
+       if (unlikely(WARN_ON(!pos->first || !pos->last) ||
+                    (pos->first == res && pos->last == res))) {
                pos->first = NULL;
                pos->last = NULL;
        } else if (pos->first == res) {
index 82094c1..c438535 100644 (file)
@@ -497,10 +497,9 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp,
                if (!(flags & drm_vmw_synccpu_allow_cs)) {
                        atomic_dec(&vmw_bo->cpu_writers);
                }
-               ttm_bo_put(&vmw_bo->tbo);
+               vmw_user_bo_unref(vmw_bo);
        }
 
-       drm_gem_object_put(&vmw_bo->tbo.base);
        return ret;
 }
 
@@ -540,8 +539,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
                        return ret;
 
                ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
-               vmw_bo_unreference(&vbo);
-               drm_gem_object_put(&vbo->tbo.base);
+               vmw_user_bo_unref(vbo);
                if (unlikely(ret != 0)) {
                        if (ret == -ERESTARTSYS || ret == -EBUSY)
                                return -EBUSY;
index 50a836e..1d433fc 100644 (file)
@@ -195,6 +195,14 @@ static inline struct vmw_bo *vmw_bo_reference(struct vmw_bo *buf)
        return buf;
 }
 
+static inline void vmw_user_bo_unref(struct vmw_bo *vbo)
+{
+       if (vbo) {
+               ttm_bo_put(&vbo->tbo);
+               drm_gem_object_put(&vbo->tbo.base);
+       }
+}
+
 static inline struct vmw_bo *to_vmw_bo(struct drm_gem_object *gobj)
 {
        return container_of((gobj), struct vmw_bo, tbo.base);
index 3810a99..58bfdf2 100644 (file)
@@ -1513,4 +1513,16 @@ static inline bool vmw_has_fences(struct vmw_private *vmw)
        return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
 }
 
+static inline bool vmw_shadertype_is_valid(enum vmw_sm_type shader_model,
+                                          u32 shader_type)
+{
+       SVGA3dShaderType max_allowed = SVGA3D_SHADERTYPE_PREDX_MAX;
+
+       if (shader_model >= VMW_SM_5)
+               max_allowed = SVGA3D_SHADERTYPE_MAX;
+       else if (shader_model >= VMW_SM_4)
+               max_allowed = SVGA3D_SHADERTYPE_DX10_MAX;
+       return shader_type >= SVGA3D_SHADERTYPE_MIN && shader_type < max_allowed;
+}
+
 #endif
index 6b9aa2b..98e0723 100644 (file)
@@ -1164,8 +1164,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
        }
        vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB);
        ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
-       ttm_bo_put(&vmw_bo->tbo);
-       drm_gem_object_put(&vmw_bo->tbo.base);
+       vmw_user_bo_unref(vmw_bo);
        if (unlikely(ret != 0))
                return ret;
 
@@ -1221,8 +1220,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
        vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM,
                             VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM);
        ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
-       ttm_bo_put(&vmw_bo->tbo);
-       drm_gem_object_put(&vmw_bo->tbo.base);
+       vmw_user_bo_unref(vmw_bo);
        if (unlikely(ret != 0))
                return ret;
 
@@ -1992,7 +1990,7 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
 
        cmd = container_of(header, typeof(*cmd), header);
 
-       if (cmd->body.type >= SVGA3D_SHADERTYPE_PREDX_MAX) {
+       if (!vmw_shadertype_is_valid(VMW_SM_LEGACY, cmd->body.type)) {
                VMW_DEBUG_USER("Illegal shader type %u.\n",
                               (unsigned int) cmd->body.type);
                return -EINVAL;
@@ -2115,8 +2113,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
                                      SVGA3dCmdHeader *header)
 {
        VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetSingleConstantBuffer);
-       SVGA3dShaderType max_shader_num = has_sm5_context(dev_priv) ?
-               SVGA3D_NUM_SHADERTYPE : SVGA3D_NUM_SHADERTYPE_DX10;
 
        struct vmw_resource *res = NULL;
        struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
@@ -2133,6 +2129,14 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
        if (unlikely(ret != 0))
                return ret;
 
+       if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type) ||
+           cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
+               VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
+                              (unsigned int) cmd->body.type,
+                              (unsigned int) cmd->body.slot);
+               return -EINVAL;
+       }
+
        binding.bi.ctx = ctx_node->ctx;
        binding.bi.res = res;
        binding.bi.bt = vmw_ctx_binding_cb;
@@ -2141,14 +2145,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
        binding.size = cmd->body.sizeInBytes;
        binding.slot = cmd->body.slot;
 
-       if (binding.shader_slot >= max_shader_num ||
-           binding.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
-               VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
-                              (unsigned int) cmd->body.type,
-                              (unsigned int) binding.slot);
-               return -EINVAL;
-       }
-
        vmw_binding_add(ctx_node->staged, &binding.bi, binding.shader_slot,
                        binding.slot);
 
@@ -2207,15 +2203,13 @@ static int vmw_cmd_dx_set_shader_res(struct vmw_private *dev_priv,
 {
        VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShaderResources) =
                container_of(header, typeof(*cmd), header);
-       SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
-               SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
 
        u32 num_sr_view = (cmd->header.size - sizeof(cmd->body)) /
                sizeof(SVGA3dShaderResourceViewId);
 
        if ((u64) cmd->body.startView + (u64) num_sr_view >
            (u64) SVGA3D_DX_MAX_SRVIEWS ||
-           cmd->body.type >= max_allowed) {
+           !vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
                VMW_DEBUG_USER("Invalid shader binding.\n");
                return -EINVAL;
        }
@@ -2239,8 +2233,6 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
                                 SVGA3dCmdHeader *header)
 {
        VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShader);
-       SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
-               SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
        struct vmw_resource *res = NULL;
        struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
        struct vmw_ctx_bindinfo_shader binding;
@@ -2251,8 +2243,7 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
 
        cmd = container_of(header, typeof(*cmd), header);
 
-       if (cmd->body.type >= max_allowed ||
-           cmd->body.type < SVGA3D_SHADERTYPE_MIN) {
+       if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
                VMW_DEBUG_USER("Illegal shader type %u.\n",
                               (unsigned int) cmd->body.type);
                return -EINVAL;
index b62207b..1489ad7 100644 (file)
@@ -1665,10 +1665,8 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 
 err_out:
        /* vmw_user_lookup_handle takes one ref so does new_fb */
-       if (bo) {
-               vmw_bo_unreference(&bo);
-               drm_gem_object_put(&bo->tbo.base);
-       }
+       if (bo)
+               vmw_user_bo_unref(bo);
        if (surface)
                vmw_surface_unreference(&surface);
 
index 7e11231..fb85f24 100644 (file)
@@ -451,8 +451,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data,
 
        ret = vmw_overlay_update_stream(dev_priv, buf, arg, true);
 
-       vmw_bo_unreference(&buf);
-       drm_gem_object_put(&buf->tbo.base);
+       vmw_user_bo_unref(buf);
 
 out_unlock:
        mutex_unlock(&overlay->mutex);
index e7226db..1e81ff2 100644 (file)
@@ -809,8 +809,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv,
                                    shader_type, num_input_sig,
                                    num_output_sig, tfile, shader_handle);
 out_bad_arg:
-       vmw_bo_unreference(&buffer);
-       drm_gem_object_put(&buffer->tbo.base);
+       vmw_user_bo_unref(buffer);
        return ret;
 }
 
index 6f0d332..06bdcf0 100644 (file)
@@ -132,29 +132,45 @@ static void get_common_inputs(struct common_input_property *common, int report_i
        common->event_type = HID_USAGE_SENSOR_EVENT_DATA_UPDATED_ENUM;
 }
 
-static int float_to_int(u32 float32)
+static int float_to_int(u32 flt32_val)
 {
        int fraction, shift, mantissa, sign, exp, zeropre;
 
-       mantissa = float32 & GENMASK(22, 0);
-       sign = (float32 & BIT(31)) ? -1 : 1;
-       exp = (float32 & ~BIT(31)) >> 23;
+       mantissa = flt32_val & GENMASK(22, 0);
+       sign = (flt32_val & BIT(31)) ? -1 : 1;
+       exp = (flt32_val & ~BIT(31)) >> 23;
 
        if (!exp && !mantissa)
                return 0;
 
+       /*
+        * Calculate the exponent and fraction part of floating
+        * point representation.
+        */
        exp -= 127;
        if (exp < 0) {
                exp = -exp;
+               if (exp >= BITS_PER_TYPE(u32))
+                       return 0;
                zeropre = (((BIT(23) + mantissa) * 100) >> 23) >> exp;
                return zeropre >= 50 ? sign : 0;
        }
 
        shift = 23 - exp;
-       float32 = BIT(exp) + (mantissa >> shift);
-       fraction = mantissa & GENMASK(shift - 1, 0);
+       if (abs(shift) >= BITS_PER_TYPE(u32))
+               return 0;
+
+       if (shift < 0) {
+               shift = -shift;
+               flt32_val = BIT(exp) + (mantissa << shift);
+               shift = 0;
+       } else {
+               flt32_val = BIT(exp) + (mantissa >> shift);
+       }
+
+       fraction = (shift == 0) ? 0 : mantissa & GENMASK(shift - 1, 0);
 
-       return (((fraction * 100) >> shift) >= 50) ? sign * (float32 + 1) : sign * float32;
+       return (((fraction * 100) >> shift) >= 50) ? sign * (flt32_val + 1) : sign * flt32_val;
 }
 
 static u8 get_input_rep(u8 current_index, int sensor_idx, int report_id,
index 49d4a26..f33485d 100644 (file)
@@ -258,19 +258,17 @@ static void mousevsc_on_receive(struct hv_device *device,
 
        switch (hid_msg_hdr->type) {
        case SYNTH_HID_PROTOCOL_RESPONSE:
+               len = struct_size(pipe_msg, data, pipe_msg->size);
+
                /*
                 * While it will be impossible for us to protect against
                 * malicious/buggy hypervisor/host, add a check here to
                 * ensure we don't corrupt memory.
                 */
-               if (struct_size(pipe_msg, data, pipe_msg->size)
-                       > sizeof(struct mousevsc_prt_msg)) {
-                       WARN_ON(1);
+               if (WARN_ON(len > sizeof(struct mousevsc_prt_msg)))
                        break;
-               }
 
-               memcpy(&input_dev->protocol_resp, pipe_msg,
-                               struct_size(pipe_msg, data, pipe_msg->size));
+               memcpy(&input_dev->protocol_resp, pipe_msg, len);
                complete(&input_dev->wait_event);
                break;
 
index a1d2690..851ee86 100644 (file)
@@ -1093,6 +1093,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
                case 0x074: map_key_clear(KEY_BRIGHTNESS_MAX);          break;
                case 0x075: map_key_clear(KEY_BRIGHTNESS_AUTO);         break;
 
+               case 0x076: map_key_clear(KEY_CAMERA_ACCESS_ENABLE);    break;
+               case 0x077: map_key_clear(KEY_CAMERA_ACCESS_DISABLE);   break;
+               case 0x078: map_key_clear(KEY_CAMERA_ACCESS_TOGGLE);    break;
+
                case 0x079: map_key_clear(KEY_KBDILLUMUP);      break;
                case 0x07a: map_key_clear(KEY_KBDILLUMDOWN);    break;
                case 0x07c: map_key_clear(KEY_KBDILLUMTOGGLE);  break;
@@ -1139,9 +1143,6 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
                case 0x0cd: map_key_clear(KEY_PLAYPAUSE);       break;
                case 0x0cf: map_key_clear(KEY_VOICECOMMAND);    break;
 
-               case 0x0d5: map_key_clear(KEY_CAMERA_ACCESS_ENABLE);            break;
-               case 0x0d6: map_key_clear(KEY_CAMERA_ACCESS_DISABLE);           break;
-               case 0x0d7: map_key_clear(KEY_CAMERA_ACCESS_TOGGLE);            break;
                case 0x0d8: map_key_clear(KEY_DICTATE);         break;
                case 0x0d9: map_key_clear(KEY_EMOJI_PICKER);    break;
 
index dfe8e09..129b01b 100644 (file)
@@ -4598,6 +4598,8 @@ static const struct hid_device_id hidpp_devices[] = {
 
        { /* Logitech G403 Wireless Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC082) },
+       { /* Logitech G502 Lightspeed Wireless Gaming Mouse over USB */
+         HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC08D) },
        { /* Logitech G703 Gaming Mouse over USB */
          HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, 0xC087) },
        { /* Logitech G703 Hero Gaming Mouse over USB */
index 85700ce..a928ad2 100644 (file)
@@ -63,12 +63,12 @@ static_assert(sizeof(enum thunderstrike_led_state) == 1);
 struct thunderstrike_hostcmd_board_info {
        __le16 revision;
        __le16 serial[7];
-};
+} __packed;
 
 struct thunderstrike_hostcmd_haptics {
        u8 motor_left;
        u8 motor_right;
-};
+} __packed;
 
 struct thunderstrike_hostcmd_resp_report {
        u8 report_id; /* THUNDERSTRIKE_HOSTCMD_RESP_REPORT_ID */
@@ -81,7 +81,7 @@ struct thunderstrike_hostcmd_resp_report {
                __le16 fw_version;
                enum thunderstrike_led_state led_state;
                u8 payload[30];
-       };
+       } __packed;
 } __packed;
 static_assert(sizeof(struct thunderstrike_hostcmd_resp_report) ==
              THUNDERSTRIKE_HOSTCMD_REPORT_SIZE);
@@ -92,15 +92,15 @@ struct thunderstrike_hostcmd_req_report {
        u8 reserved_at_10;
 
        union {
-               struct {
+               struct __packed {
                        u8 update;
                        enum thunderstrike_led_state state;
                } led;
-               struct {
+               struct __packed {
                        u8 update;
                        struct thunderstrike_hostcmd_haptics motors;
                } haptics;
-       };
+       } __packed;
        u8 reserved_at_30[27];
 } __packed;
 static_assert(sizeof(struct thunderstrike_hostcmd_req_report) ==
index 5978e9d..ebf15f3 100644 (file)
@@ -209,8 +209,7 @@ int vmbus_connect(void)
         * Setup the vmbus event connection for channel interrupt
         * abstraction stuff
         */
-       vmbus_connection.int_page =
-       (void *)hv_alloc_hyperv_zeroed_page();
+       vmbus_connection.int_page = hv_alloc_hyperv_zeroed_page();
        if (vmbus_connection.int_page == NULL) {
                ret = -ENOMEM;
                goto cleanup;
@@ -225,8 +224,8 @@ int vmbus_connect(void)
         * Setup the monitor notification facility. The 1st page for
         * parent->child and the 2nd page for child->parent
         */
-       vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_page();
-       vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_page();
+       vmbus_connection.monitor_pages[0] = hv_alloc_hyperv_page();
+       vmbus_connection.monitor_pages[1] = hv_alloc_hyperv_page();
        if ((vmbus_connection.monitor_pages[0] == NULL) ||
            (vmbus_connection.monitor_pages[1] == NULL)) {
                ret = -ENOMEM;
@@ -333,15 +332,15 @@ void vmbus_disconnect(void)
                destroy_workqueue(vmbus_connection.work_queue);
 
        if (vmbus_connection.int_page) {
-               hv_free_hyperv_page((unsigned long)vmbus_connection.int_page);
+               hv_free_hyperv_page(vmbus_connection.int_page);
                vmbus_connection.int_page = NULL;
        }
 
        set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
        set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
 
-       hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
-       hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
+       hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+       hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
        vmbus_connection.monitor_pages[0] = NULL;
        vmbus_connection.monitor_pages[1] = NULL;
 }
index dffcc89..0d7a3ba 100644 (file)
@@ -1628,7 +1628,7 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
        WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
        WARN_ON_ONCE(sgl->length < (HV_HYP_PAGE_SIZE << page_reporting_order));
        local_irq_save(flags);
-       hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg);
+       hint = *this_cpu_ptr(hyperv_pcpu_input_arg);
        if (!hint) {
                local_irq_restore(flags);
                return -ENOSPC;
index 542a1d5..6a2258f 100644 (file)
@@ -115,12 +115,12 @@ void *hv_alloc_hyperv_zeroed_page(void)
 }
 EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
 
-void hv_free_hyperv_page(unsigned long addr)
+void hv_free_hyperv_page(void *addr)
 {
        if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
-               free_page(addr);
+               free_page((unsigned long)addr);
        else
-               kfree((void *)addr);
+               kfree(addr);
 }
 EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
 
@@ -253,7 +253,7 @@ static void hv_kmsg_dump_unregister(void)
        atomic_notifier_chain_unregister(&panic_notifier_list,
                                         &hyperv_panic_report_block);
 
-       hv_free_hyperv_page((unsigned long)hv_panic_page);
+       hv_free_hyperv_page(hv_panic_page);
        hv_panic_page = NULL;
 }
 
@@ -270,7 +270,7 @@ static void hv_kmsg_dump_register(void)
        ret = kmsg_dump_register(&hv_kmsg_dumper);
        if (ret) {
                pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
-               hv_free_hyperv_page((unsigned long)hv_panic_page);
+               hv_free_hyperv_page(hv_panic_page);
                hv_panic_page = NULL;
        }
 }
index a981f70..0238078 100644 (file)
 
 #include <linux/crc16.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 #include <linux/hid.h>
 #include <linux/hwmon.h>
 #include <linux/jiffies.h>
+#include <linux/ktime.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/seq_file.h>
@@ -63,6 +65,8 @@ static const char *const aqc_device_names[] = {
 #define CTRL_REPORT_ID                 0x03
 #define AQUAERO_CTRL_REPORT_ID         0x0b
 
+#define CTRL_REPORT_DELAY              200     /* ms */
+
 /* The HID report that the official software always sends
  * after writing values, currently same for all devices
  */
@@ -527,6 +531,9 @@ struct aqc_data {
        int secondary_ctrl_report_size;
        u8 *secondary_ctrl_report;
 
+       ktime_t last_ctrl_report_op;
+       int ctrl_report_delay;  /* Delay between two ctrl report operations, in ms */
+
        int buffer_size;
        u8 *buffer;
        int checksum_start;
@@ -611,17 +618,35 @@ static int aqc_aquastreamxt_convert_fan_rpm(u16 val)
        return 0;
 }
 
+static void aqc_delay_ctrl_report(struct aqc_data *priv)
+{
+       /*
+        * If previous read or write is too close to this one, delay the current operation
+        * to give the device enough time to process the previous one.
+        */
+       if (priv->ctrl_report_delay) {
+               s64 delta = ktime_ms_delta(ktime_get(), priv->last_ctrl_report_op);
+
+               if (delta < priv->ctrl_report_delay)
+                       msleep(priv->ctrl_report_delay - delta);
+       }
+}
+
 /* Expects the mutex to be locked */
 static int aqc_get_ctrl_data(struct aqc_data *priv)
 {
        int ret;
 
+       aqc_delay_ctrl_report(priv);
+
        memset(priv->buffer, 0x00, priv->buffer_size);
        ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size,
                                 HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
        if (ret < 0)
                ret = -ENODATA;
 
+       priv->last_ctrl_report_op = ktime_get();
+
        return ret;
 }
 
@@ -631,6 +656,8 @@ static int aqc_send_ctrl_data(struct aqc_data *priv)
        int ret;
        u16 checksum;
 
+       aqc_delay_ctrl_report(priv);
+
        /* Checksum is not needed for Aquaero */
        if (priv->kind != aquaero) {
                /* Init and xorout value for CRC-16/USB is 0xffff */
@@ -646,12 +673,16 @@ static int aqc_send_ctrl_data(struct aqc_data *priv)
        ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size,
                                 HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
        if (ret < 0)
-               return ret;
+               goto record_access_and_ret;
 
        /* The official software sends this report after every change, so do it here as well */
        ret = hid_hw_raw_request(priv->hdev, priv->secondary_ctrl_report_id,
                                 priv->secondary_ctrl_report, priv->secondary_ctrl_report_size,
                                 HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+
+record_access_and_ret:
+       priv->last_ctrl_report_op = ktime_get();
+
        return ret;
 }
 
@@ -1027,7 +1058,7 @@ static int aqc_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
                        if (ret < 0)
                                return ret;
 
-                       *val = aqc_percent_to_pwm(ret);
+                       *val = aqc_percent_to_pwm(*val);
                        break;
                }
                break;
@@ -1524,6 +1555,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
 
                priv->buffer_size = AQUAERO_CTRL_REPORT_SIZE;
                priv->temp_ctrl_offset = AQUAERO_TEMP_CTRL_OFFSET;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
 
                priv->temp_label = label_temp_sensors;
                priv->virtual_temp_label = label_virtual_temp_sensors;
@@ -1547,6 +1579,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                priv->temp_ctrl_offset = D5NEXT_TEMP_CTRL_OFFSET;
 
                priv->buffer_size = D5NEXT_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
 
                priv->power_cycle_count_offset = D5NEXT_POWER_CYCLES;
 
@@ -1597,6 +1630,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                priv->temp_ctrl_offset = OCTO_TEMP_CTRL_OFFSET;
 
                priv->buffer_size = OCTO_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
 
                priv->power_cycle_count_offset = OCTO_POWER_CYCLES;
 
@@ -1624,6 +1658,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
                priv->temp_ctrl_offset = QUADRO_TEMP_CTRL_OFFSET;
 
                priv->buffer_size = QUADRO_CTRL_REPORT_SIZE;
+               priv->ctrl_report_delay = CTRL_REPORT_DELAY;
 
                priv->flow_pulses_ctrl_offset = QUADRO_FLOW_PULSES_CTRL_OFFSET;
                priv->power_cycle_count_offset = QUADRO_POWER_CYCLES;
index 7b177b9..bae0bec 100644 (file)
@@ -65,7 +65,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
 #define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET    0xd8200c64
 #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET    0xd8200ca4
 
-/* Common for Zen CPU families (Family 17h and 18h and 19h) */
+/* Common for Zen CPU families (Family 17h and 18h and 19h and 1Ah) */
 #define ZEN_REPORTED_TEMP_CTRL_BASE            0x00059800
 
 #define ZEN_CCD_TEMP(offset, x)                        (ZEN_REPORTED_TEMP_CTRL_BASE + \
@@ -77,6 +77,13 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
 #define ZEN_CUR_TEMP_RANGE_SEL_MASK            BIT(19)
 #define ZEN_CUR_TEMP_TJ_SEL_MASK               GENMASK(17, 16)
 
+/*
+ * AMD's Industrial processor 3255 supports temperature from -40 deg to 105 deg Celsius.
+ * Use the model name to identify 3255 CPUs and set a flag to display negative temperature.
+ * Do not round off to zero for negative Tctl or Tdie values if the flag is set
+ */
+#define AMD_I3255_STR                          "3255"
+
 struct k10temp_data {
        struct pci_dev *pdev;
        void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
@@ -86,6 +93,7 @@ struct k10temp_data {
        u32 show_temp;
        bool is_zen;
        u32 ccd_offset;
+       bool disp_negative;
 };
 
 #define TCTL_BIT       0
@@ -204,12 +212,12 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
                switch (channel) {
                case 0:         /* Tctl */
                        *val = get_raw_temp(data);
-                       if (*val < 0)
+                       if (*val < 0 && !data->disp_negative)
                                *val = 0;
                        break;
                case 1:         /* Tdie */
                        *val = get_raw_temp(data) - data->temp_offset;
-                       if (*val < 0)
+                       if (*val < 0 && !data->disp_negative)
                                *val = 0;
                        break;
                case 2 ... 13:          /* Tccd{1-12} */
@@ -405,6 +413,11 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        data->pdev = pdev;
        data->show_temp |= BIT(TCTL_BIT);       /* Always show Tctl */
 
+       if (boot_cpu_data.x86 == 0x17 &&
+           strstr(boot_cpu_data.x86_model_id, AMD_I3255_STR)) {
+               data->disp_negative = true;
+       }
+
        if (boot_cpu_data.x86 == 0x15 &&
            ((boot_cpu_data.x86_model & 0xf0) == 0x60 ||
             (boot_cpu_data.x86_model & 0xf0) == 0x70)) {
@@ -462,6 +475,10 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                        k10temp_get_ccd_support(pdev, data, 12);
                        break;
                }
+       } else if (boot_cpu_data.x86 == 0x1a) {
+               data->temp_adjust_mask = ZEN_CUR_TEMP_RANGE_SEL_MASK;
+               data->read_tempreg = read_tempreg_nb_zen;
+               data->is_zen = true;
        } else {
                data->read_htcreg = read_htcreg_pci;
                data->read_tempreg = read_tempreg_pci;
@@ -508,6 +525,8 @@ static const struct pci_device_id k10temp_id_table[] = {
        { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
        { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
        { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
+       { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) },
+       { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) },
        { PCI_VDEVICE(HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
        {}
 };
index 236dc97..08ce498 100644 (file)
@@ -955,14 +955,25 @@ static const u16 scale_in[15] = {
        800, 800
 };
 
-static inline long in_from_reg(u8 reg, u8 nr)
+/*
+ * NCT6798 scaling:
+ *    CPUVC, IN1, AVSB, 3VCC, IN0, IN8, IN4, 3VSB, VBAT,  VTT,  IN5,  IN6, IN2,
+ *      IN3, IN7
+ * Additional scales to be added later: IN9 (800), VHIF (1600)
+ */
+static const u16 scale_in_6798[15] = {
+       800, 800, 1600, 1600, 800, 800, 800, 1600, 1600, 1600, 1600, 1600, 800,
+       800, 800
+};
+
+static inline long in_from_reg(u8 reg, u8 nr, const u16 *scales)
 {
-       return DIV_ROUND_CLOSEST(reg * scale_in[nr], 100);
+       return DIV_ROUND_CLOSEST(reg * scales[nr], 100);
 }
 
-static inline u8 in_to_reg(u32 val, u8 nr)
+static inline u8 in_to_reg(u32 val, u8 nr, const u16 *scales)
 {
-       return clamp_val(DIV_ROUND_CLOSEST(val * 100, scale_in[nr]), 0, 255);
+       return clamp_val(DIV_ROUND_CLOSEST(val * 100, scales[nr]), 0, 255);
 }
 
 /* TSI temperatures are in 8.3 format */
@@ -1673,7 +1684,8 @@ show_in_reg(struct device *dev, struct device_attribute *attr, char *buf)
        if (IS_ERR(data))
                return PTR_ERR(data);
 
-       return sprintf(buf, "%ld\n", in_from_reg(data->in[nr][index], nr));
+       return sprintf(buf, "%ld\n",
+                      in_from_reg(data->in[nr][index], nr, data->scale_in));
 }
 
 static ssize_t
@@ -1691,7 +1703,7 @@ store_in_reg(struct device *dev, struct device_attribute *attr, const char *buf,
        if (err < 0)
                return err;
        mutex_lock(&data->update_lock);
-       data->in[nr][index] = in_to_reg(val, nr);
+       data->in[nr][index] = in_to_reg(val, nr, data->scale_in);
        err = nct6775_write_value(data, data->REG_IN_MINMAX[index - 1][nr], data->in[nr][index]);
        mutex_unlock(&data->update_lock);
        return err ? : count;
@@ -3462,6 +3474,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
        mutex_init(&data->update_lock);
        data->name = nct6775_device_names[data->kind];
        data->bank = 0xff;              /* Force initial bank selection */
+       data->scale_in = scale_in;
 
        switch (data->kind) {
        case nct6106:
@@ -3977,6 +3990,9 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
                        break;
                }
 
+               if (data->kind == nct6798 || data->kind == nct6799)
+                       data->scale_in = scale_in_6798;
+
                reg_temp = NCT6779_REG_TEMP;
                num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP);
                if (data->kind == nct6791) {
index ada867d..a409d7a 100644 (file)
@@ -586,7 +586,7 @@ nct6775_check_fan_inputs(struct nct6775_data *data, struct nct6775_sio_data *sio
                int creb;
                int cred;
 
-               cre6 = sio_data->sio_inb(sio_data, 0xe0);
+               cre6 = sio_data->sio_inb(sio_data, 0xe6);
 
                sio_data->sio_select(sio_data, NCT6775_LD_12);
                cre0 = sio_data->sio_inb(sio_data, 0xe0);
index 44f79c5..a84c6ce 100644 (file)
@@ -98,6 +98,7 @@ struct nct6775_data {
        u8 bank;                /* current register bank */
        u8 in_num;              /* number of in inputs we have */
        u8 in[15][3];           /* [0]=in, [1]=in_max, [2]=in_min */
+       const u16 *scale_in;    /* internal scaling factors */
        unsigned int rpm[NUM_FAN];
        u16 fan_min[NUM_FAN];
        u8 fan_pulses[NUM_FAN];
index 9339bfc..024cff1 100644 (file)
@@ -725,7 +725,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj,
        if (index >= 38 && index < 46 && !(reg & 0x01))         /* PECI 0 */
                return 0;
 
-       if (index >= 0x46 && (!(reg & 0x02)))                   /* PECI 1 */
+       if (index >= 46 && !(reg & 0x02))                       /* PECI 1 */
                return 0;
 
        return attr->mode;
index e1a907c..1e1cc67 100644 (file)
@@ -220,6 +220,20 @@ static int tt_toggle_disable(void)
 }
 
 /* Callbacks for turbo toggle attribute */
+static umode_t tt_toggle_is_visible(struct kobject *kobj,
+                                   struct attribute *attr, int n)
+{
+       switch (board) {
+       case aok_zoe_a1:
+       case oxp_mini_amd_a07:
+       case oxp_mini_amd_pro:
+               return attr->mode;
+       default:
+               break;
+       }
+       return 0;
+}
+
 static ssize_t tt_toggle_store(struct device *dev,
                               struct device_attribute *attr, const char *buf,
                               size_t count)
@@ -396,7 +410,15 @@ static struct attribute *oxp_ec_attrs[] = {
        NULL
 };
 
-ATTRIBUTE_GROUPS(oxp_ec);
+static struct attribute_group oxp_ec_attribute_group = {
+       .is_visible = tt_toggle_is_visible,
+       .attrs = oxp_ec_attrs,
+};
+
+static const struct attribute_group *oxp_ec_groups[] = {
+       &oxp_ec_attribute_group,
+       NULL
+};
 
 static const struct hwmon_ops oxp_ec_hwmon_ops = {
        .is_visible = oxp_ec_hwmon_is_visible,
@@ -415,7 +437,6 @@ static int oxp_platform_probe(struct platform_device *pdev)
        const struct dmi_system_id *dmi_entry;
        struct device *dev = &pdev->dev;
        struct device *hwdev;
-       int ret;
 
        /*
         * Have to check for AMD processor here because DMI strings are the
@@ -430,18 +451,6 @@ static int oxp_platform_probe(struct platform_device *pdev)
 
        board = (enum oxp_board)(unsigned long)dmi_entry->driver_data;
 
-       switch (board) {
-       case aok_zoe_a1:
-       case oxp_mini_amd_a07:
-       case oxp_mini_amd_pro:
-               ret = devm_device_add_groups(dev, oxp_ec_groups);
-               if (ret)
-                       return ret;
-               break;
-       default:
-               break;
-       }
-
        hwdev = devm_hwmon_device_register_with_info(dev, "oxpec", NULL,
                                                     &oxp_ec_chip_info, NULL);
 
@@ -451,6 +460,7 @@ static int oxp_platform_probe(struct platform_device *pdev)
 static struct platform_driver oxp_platform_driver = {
        .driver = {
                .name = "oxp-platform",
+               .dev_groups = oxp_ec_groups,
        },
        .probe = oxp_platform_probe,
 };
index fa5070a..7c5f4b1 100644 (file)
 enum chips {pfe1100, pfe3000};
 
 /*
- * Disable status check for pfe3000 devices, because some devices report
- * communication error (invalid command) for VOUT_MODE command (0x20)
- * although correct VOUT_MODE (0x16) is returned: it leads to incorrect
- * exponent in linear mode.
+ * Disable status check because some devices report communication error
+ * (invalid command) for VOUT_MODE command (0x20) although the correct
+ * VOUT_MODE (0x16) is returned: it leads to incorrect exponent in linear
+ * mode.
+ * This affects both pfe3000 and pfe1100.
  */
-static struct pmbus_platform_data pfe3000_plat_data = {
+static struct pmbus_platform_data pfe_plat_data = {
        .flags = PMBUS_SKIP_STATUS_CHECK,
 };
 
@@ -94,16 +95,15 @@ static int pfe_pmbus_probe(struct i2c_client *client)
        int model;
 
        model = (int)i2c_match_id(pfe_device_id, client)->driver_data;
+       client->dev.platform_data = &pfe_plat_data;
 
        /*
         * PFE3000-12-069RA devices may not stay in page 0 during device
         * probe which leads to probe failure (read status word failed).
         * So let's set the device to page 0 at the beginning.
         */
-       if (model == pfe3000) {
-               client->dev.platform_data = &pfe3000_plat_data;
+       if (model == pfe3000)
                i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0);
-       }
 
        return pmbus_do_probe(client, &pfe_driver_info[model]);
 }
index fa06325..69a4e62 100644 (file)
@@ -2745,9 +2745,8 @@ static const struct pmbus_status_category __maybe_unused pmbus_status_flag_map[]
        },
 };
 
-static int _pmbus_is_enabled(struct device *dev, u8 page)
+static int _pmbus_is_enabled(struct i2c_client *client, u8 page)
 {
-       struct i2c_client *client = to_i2c_client(dev->parent);
        int ret;
 
        ret = _pmbus_read_byte_data(client, page, PMBUS_OPERATION);
@@ -2758,17 +2757,16 @@ static int _pmbus_is_enabled(struct device *dev, u8 page)
        return !!(ret & PB_OPERATION_CONTROL_ON);
 }
 
-static int __maybe_unused pmbus_is_enabled(struct device *dev, u8 page)
+static int __maybe_unused pmbus_is_enabled(struct i2c_client *client, u8 page)
 {
-       struct i2c_client *client = to_i2c_client(dev->parent);
        struct pmbus_data *data = i2c_get_clientdata(client);
        int ret;
 
        mutex_lock(&data->update_lock);
-       ret = _pmbus_is_enabled(dev, page);
+       ret = _pmbus_is_enabled(client, page);
        mutex_unlock(&data->update_lock);
 
-       return !!(ret & PB_OPERATION_CONTROL_ON);
+       return ret;
 }
 
 #define to_dev_attr(_dev_attr) \
@@ -2844,7 +2842,7 @@ static int _pmbus_get_flags(struct pmbus_data *data, u8 page, unsigned int *flag
        if (status < 0)
                return status;
 
-       if (_pmbus_is_enabled(dev, page)) {
+       if (_pmbus_is_enabled(client, page)) {
                if (status & PB_STATUS_OFF) {
                        *flags |= REGULATOR_ERROR_FAIL;
                        *event |= REGULATOR_EVENT_FAIL;
@@ -2898,7 +2896,10 @@ static int __maybe_unused pmbus_get_flags(struct pmbus_data *data, u8 page, unsi
 #if IS_ENABLED(CONFIG_REGULATOR)
 static int pmbus_regulator_is_enabled(struct regulator_dev *rdev)
 {
-       return pmbus_is_enabled(rdev_get_dev(rdev), rdev_get_id(rdev));
+       struct device *dev = rdev_get_dev(rdev);
+       struct i2c_client *client = to_i2c_client(dev->parent);
+
+       return pmbus_is_enabled(client, rdev_get_id(rdev));
 }
 
 static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable)
@@ -2945,6 +2946,7 @@ static int pmbus_regulator_get_status(struct regulator_dev *rdev)
        struct pmbus_data *data = i2c_get_clientdata(client);
        u8 page = rdev_get_id(rdev);
        int status, ret;
+       int event;
 
        mutex_lock(&data->update_lock);
        status = pmbus_get_status(client, page, PMBUS_STATUS_WORD);
@@ -2964,7 +2966,7 @@ static int pmbus_regulator_get_status(struct regulator_dev *rdev)
                goto unlock;
        }
 
-       ret = pmbus_regulator_get_error_flags(rdev, &status);
+       ret = _pmbus_get_flags(data, rdev_get_id(rdev), &status, &event, false);
        if (ret)
                goto unlock;
 
index 2d8342f..05c8068 100644 (file)
@@ -233,13 +233,14 @@ static inline u32 iproc_i2c_rd_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
                                   u32 offset)
 {
        u32 val;
+       unsigned long flags;
 
        if (iproc_i2c->idm_base) {
-               spin_lock(&iproc_i2c->idm_lock);
+               spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
                writel(iproc_i2c->ape_addr_mask,
                       iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
                val = readl(iproc_i2c->base + offset);
-               spin_unlock(&iproc_i2c->idm_lock);
+               spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
        } else {
                val = readl(iproc_i2c->base + offset);
        }
@@ -250,12 +251,14 @@ static inline u32 iproc_i2c_rd_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
 static inline void iproc_i2c_wr_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
                                    u32 offset, u32 val)
 {
+       unsigned long flags;
+
        if (iproc_i2c->idm_base) {
-               spin_lock(&iproc_i2c->idm_lock);
+               spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
                writel(iproc_i2c->ape_addr_mask,
                       iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
                writel(val, iproc_i2c->base + offset);
-               spin_unlock(&iproc_i2c->idm_lock);
+               spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
        } else {
                writel(val, iproc_i2c->base + offset);
        }
index 3bfd7a2..24bef00 100644 (file)
@@ -588,9 +588,21 @@ i2c_dw_read(struct dw_i2c_dev *dev)
                        u32 flags = msgs[dev->msg_read_idx].flags;
 
                        regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
+                       tmp &= DW_IC_DATA_CMD_DAT;
                        /* Ensure length byte is a valid value */
-                       if (flags & I2C_M_RECV_LEN &&
-                           (tmp & DW_IC_DATA_CMD_DAT) <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
+                       if (flags & I2C_M_RECV_LEN) {
+                               /*
+                                * if IC_EMPTYFIFO_HOLD_MASTER_EN is set, which cannot be
+                                * detected from the registers, the controller can be
+                                * disabled if the STOP bit is set. But it is only set
+                                * after receiving block data response length in
+                                * I2C_FUNC_SMBUS_BLOCK_DATA case. That needs to read
+                                * another byte with STOP bit set when the block data
+                                * response length is invalid to complete the transaction.
+                                */
+                               if (!tmp || tmp > I2C_SMBUS_BLOCK_MAX)
+                                       tmp = 1;
+
                                len = i2c_dw_recv_len(dev, tmp);
                        }
                        *buf++ = tmp;
index e067671..0980c77 100644 (file)
@@ -330,6 +330,14 @@ static irqreturn_t hisi_i2c_irq(int irq, void *context)
        struct hisi_i2c_controller *ctlr = context;
        u32 int_stat;
 
+       /*
+        * Don't handle the interrupt if cltr->completion is NULL. We may
+        * reach here because the interrupt is spurious or the transfer is
+        * started by another port (e.g. firmware) rather than us.
+        */
+       if (!ctlr->completion)
+               return IRQ_NONE;
+
        int_stat = readl(ctlr->iobase + HISI_I2C_INT_MSTAT);
        hisi_i2c_clear_int(ctlr, int_stat);
        if (!(int_stat & HISI_I2C_INT_ALL))
index c3287c8..150d923 100644 (file)
@@ -209,6 +209,9 @@ static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
        lpi2c_imx_set_mode(lpi2c_imx);
 
        clk_rate = clk_get_rate(lpi2c_imx->clks[0].clk);
+       if (!clk_rate)
+               return -EINVAL;
+
        if (lpi2c_imx->mode == HS || lpi2c_imx->mode == ULTRA_FAST)
                filt = 0;
        else
index ad8270c..fa6020d 100644 (file)
@@ -250,7 +250,8 @@ static int p2wi_probe(struct platform_device *pdev)
 
        p2wi->rstc = devm_reset_control_get_exclusive(dev, NULL);
        if (IS_ERR(p2wi->rstc)) {
-               dev_err(dev, "failed to retrieve reset controller: %d\n", ret);
+               dev_err(dev, "failed to retrieve reset controller: %pe\n",
+                       p2wi->rstc);
                return PTR_ERR(p2wi->rstc);
        }
 
index bcbbf23..03fc10b 100644 (file)
@@ -442,7 +442,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev)
        if (IS_VI(i2c_dev))
                return 0;
 
-       if (!i2c_dev->hw->has_apb_dma) {
+       if (i2c_dev->hw->has_apb_dma) {
                if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) {
                        dev_dbg(i2c_dev->dev, "APB DMA support not enabled\n");
                        return 0;
@@ -460,6 +460,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev)
        i2c_dev->dma_chan = dma_request_chan(i2c_dev->dev, "tx");
        if (IS_ERR(i2c_dev->dma_chan)) {
                err = PTR_ERR(i2c_dev->dma_chan);
+               i2c_dev->dma_chan = NULL;
                goto err_out;
        }
 
index b930036..ea5a6a1 100644 (file)
@@ -199,43 +199,6 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
        return __intel_idle(dev, drv, index);
 }
 
-static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev,
-                                       struct cpuidle_driver *drv, int index)
-{
-       raw_safe_halt();
-       raw_local_irq_disable();
-       return index;
-}
-
-/**
- * intel_idle_hlt - Ask the processor to enter the given idle state using hlt.
- * @dev: cpuidle device of the target CPU.
- * @drv: cpuidle driver (assumed to point to intel_idle_driver).
- * @index: Target idle state index.
- *
- * Use the HLT instruction to notify the processor that the CPU represented by
- * @dev is idle and it can try to enter the idle state corresponding to @index.
- *
- * Must be called under local_irq_disable().
- */
-static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev,
-                               struct cpuidle_driver *drv, int index)
-{
-       return __intel_idle_hlt(dev, drv, index);
-}
-
-static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev,
-                                   struct cpuidle_driver *drv, int index)
-{
-       int ret;
-
-       raw_local_irq_enable();
-       ret = __intel_idle_hlt(dev, drv, index);
-       raw_local_irq_disable();
-
-       return ret;
-}
-
 /**
  * intel_idle_s2idle - Ask the processor to enter the given idle state.
  * @dev: cpuidle device of the target CPU.
@@ -960,7 +923,7 @@ static struct cpuidle_state adl_l_cstates[] __initdata = {
                .enter = NULL }
 };
 
-static struct cpuidle_state adl_n_cstates[] __initdata = {
+static struct cpuidle_state gmt_cstates[] __initdata = {
        {
                .name = "C1",
                .desc = "MWAIT 0x00",
@@ -1279,25 +1242,6 @@ static struct cpuidle_state snr_cstates[] __initdata = {
                .enter = NULL }
 };
 
-static struct cpuidle_state vmguest_cstates[] __initdata = {
-       {
-               .name = "C1",
-               .desc = "HLT",
-               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
-               .exit_latency = 5,
-               .target_residency = 10,
-               .enter = &intel_idle_hlt, },
-       {
-               .name = "C1L",
-               .desc = "Long HLT",
-               .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED,
-               .exit_latency = 5,
-               .target_residency = 200,
-               .enter = &intel_idle_hlt, },
-       {
-               .enter = NULL }
-};
-
 static const struct idle_cpu idle_cpu_nehalem __initconst = {
        .state_table = nehalem_cstates,
        .auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
@@ -1405,8 +1349,8 @@ static const struct idle_cpu idle_cpu_adl_l __initconst = {
        .state_table = adl_l_cstates,
 };
 
-static const struct idle_cpu idle_cpu_adl_n __initconst = {
-       .state_table = adl_n_cstates,
+static const struct idle_cpu idle_cpu_gmt __initconst = {
+       .state_table = gmt_cstates,
 };
 
 static const struct idle_cpu idle_cpu_spr __initconst = {
@@ -1479,7 +1423,7 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           &idle_cpu_icx),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &idle_cpu_adl),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &idle_cpu_adl_l),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         &idle_cpu_adl_n),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,      &idle_cpu_gmt),
        X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,    &idle_cpu_spr),
        X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X,     &idle_cpu_spr),
        X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,        &idle_cpu_knl),
@@ -1897,16 +1841,6 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
 
 static void state_update_enter_method(struct cpuidle_state *state, int cstate)
 {
-       if (state->enter == intel_idle_hlt) {
-               if (force_irq_on) {
-                       pr_info("forced intel_idle_irq for state %d\n", cstate);
-                       state->enter = intel_idle_hlt_irq_on;
-               }
-               return;
-       }
-       if (state->enter == intel_idle_hlt_irq_on)
-               return; /* no update scenarios */
-
        if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
                /*
                 * Combining with XSTATE with IBRS or IRQ_ENABLE flags
@@ -1940,21 +1874,6 @@ static void state_update_enter_method(struct cpuidle_state *state, int cstate)
        }
 }
 
-/*
- * For mwait based states, we want to verify the cpuid data to see if the state
- * is actually supported by this specific CPU.
- * For non-mwait based states, this check should be skipped.
- */
-static bool should_verify_mwait(struct cpuidle_state *state)
-{
-       if (state->enter == intel_idle_hlt)
-               return false;
-       if (state->enter == intel_idle_hlt_irq_on)
-               return false;
-
-       return true;
-}
-
 static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
 {
        int cstate;
@@ -1979,7 +1898,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
                break;
        case INTEL_FAM6_ALDERLAKE:
        case INTEL_FAM6_ALDERLAKE_L:
-       case INTEL_FAM6_ALDERLAKE_N:
+       case INTEL_FAM6_ATOM_GRACEMONT:
                adl_idle_state_table_update();
                break;
        }
@@ -2003,7 +1922,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
                }
 
                mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
-               if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint))
+               if (!intel_idle_verify_cstate(mwait_hint))
                        continue;
 
                /* Structure copy. */
@@ -2137,93 +2056,6 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
                cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
 }
 
-/*
- * Match up the latency and break even point of the bare metal (cpu based)
- * states with the deepest VM available state.
- *
- * We only want to do this for the deepest state, the ones that has
- * the TLB_FLUSHED flag set on the .
- *
- * All our short idle states are dominated by vmexit/vmenter latencies,
- * not the underlying hardware latencies so we keep our values for these.
- */
-static void __init matchup_vm_state_with_baremetal(void)
-{
-       int cstate;
-
-       for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
-               int matching_cstate;
-
-               if (intel_idle_max_cstate_reached(cstate))
-                       break;
-
-               if (!cpuidle_state_table[cstate].enter)
-                       break;
-
-               if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED))
-                       continue;
-
-               for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) {
-                       if (!icpu->state_table[matching_cstate].enter)
-                               break;
-                       if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) {
-                               cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency;
-                               cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency;
-                       }
-               }
-
-       }
-}
-
-
-static int __init intel_idle_vminit(const struct x86_cpu_id *id)
-{
-       int retval;
-
-       cpuidle_state_table = vmguest_cstates;
-
-       icpu = (const struct idle_cpu *)id->driver_data;
-
-       pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
-                boot_cpu_data.x86_model);
-
-       intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
-       if (!intel_idle_cpuidle_devices)
-               return -ENOMEM;
-
-       /*
-        * We don't know exactly what the host will do when we go idle, but as a worst estimate
-        * we can assume that the exit latency of the deepest host state will be hit for our
-        * deep (long duration) guest idle state.
-        * The same logic applies to the break even point for the long duration guest idle state.
-        * So lets copy these two properties from the table we found for the host CPU type.
-        */
-       matchup_vm_state_with_baremetal();
-
-       intel_idle_cpuidle_driver_init(&intel_idle_driver);
-
-       retval = cpuidle_register_driver(&intel_idle_driver);
-       if (retval) {
-               struct cpuidle_driver *drv = cpuidle_get_driver();
-               printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
-                      drv ? drv->name : "none");
-               goto init_driver_fail;
-       }
-
-       retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
-                                  intel_idle_cpu_online, NULL);
-       if (retval < 0)
-               goto hp_setup_fail;
-
-       return 0;
-hp_setup_fail:
-       intel_idle_cpuidle_devices_uninit();
-       cpuidle_unregister_driver(&intel_idle_driver);
-init_driver_fail:
-       free_percpu(intel_idle_cpuidle_devices);
-       return retval;
-}
-
 static int __init intel_idle_init(void)
 {
        const struct x86_cpu_id *id;
@@ -2242,8 +2074,6 @@ static int __init intel_idle_init(void)
        id = x86_match_cpu(intel_idle_ids);
        if (id) {
                if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
-                       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
-                               return intel_idle_vminit(id);
                        pr_debug("Please enable MWAIT in BIOS SETUP\n");
                        return -ENODEV;
                }
index 8685e0b..7bc3ebf 100644 (file)
@@ -62,7 +62,6 @@
 #define AD7192_MODE_STA_MASK   BIT(20) /* Status Register transmission Mask */
 #define AD7192_MODE_CLKSRC(x)  (((x) & 0x3) << 18) /* Clock Source Select */
 #define AD7192_MODE_SINC3      BIT(15) /* SINC3 Filter Select */
-#define AD7192_MODE_ACX                BIT(14) /* AC excitation enable(AD7195 only)*/
 #define AD7192_MODE_ENPAR      BIT(13) /* Parity Enable */
 #define AD7192_MODE_CLKDIV     BIT(12) /* Clock divide by 2 (AD7190/2 only)*/
 #define AD7192_MODE_SCYCLE     BIT(11) /* Single cycle conversion */
@@ -91,6 +90,7 @@
 /* Configuration Register Bit Designations (AD7192_REG_CONF) */
 
 #define AD7192_CONF_CHOP       BIT(23) /* CHOP enable */
+#define AD7192_CONF_ACX                BIT(22) /* AC excitation enable(AD7195 only) */
 #define AD7192_CONF_REFSEL     BIT(20) /* REFIN1/REFIN2 Reference Select */
 #define AD7192_CONF_CHAN(x)    ((x) << 8) /* Channel select */
 #define AD7192_CONF_CHAN_MASK  (0x7FF << 8) /* Channel select mask */
@@ -472,7 +472,7 @@ static ssize_t ad7192_show_ac_excitation(struct device *dev,
        struct iio_dev *indio_dev = dev_to_iio_dev(dev);
        struct ad7192_state *st = iio_priv(indio_dev);
 
-       return sysfs_emit(buf, "%d\n", !!(st->mode & AD7192_MODE_ACX));
+       return sysfs_emit(buf, "%d\n", !!(st->conf & AD7192_CONF_ACX));
 }
 
 static ssize_t ad7192_show_bridge_switch(struct device *dev,
@@ -513,13 +513,13 @@ static ssize_t ad7192_set(struct device *dev,
 
                ad_sd_write_reg(&st->sd, AD7192_REG_GPOCON, 1, st->gpocon);
                break;
-       case AD7192_REG_MODE:
+       case AD7192_REG_CONF:
                if (val)
-                       st->mode |= AD7192_MODE_ACX;
+                       st->conf |= AD7192_CONF_ACX;
                else
-                       st->mode &= ~AD7192_MODE_ACX;
+                       st->conf &= ~AD7192_CONF_ACX;
 
-               ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode);
+               ad_sd_write_reg(&st->sd, AD7192_REG_CONF, 3, st->conf);
                break;
        default:
                ret = -EINVAL;
@@ -579,12 +579,11 @@ static IIO_DEVICE_ATTR(bridge_switch_en, 0644,
 
 static IIO_DEVICE_ATTR(ac_excitation_en, 0644,
                       ad7192_show_ac_excitation, ad7192_set,
-                      AD7192_REG_MODE);
+                      AD7192_REG_CONF);
 
 static struct attribute *ad7192_attributes[] = {
        &iio_dev_attr_filter_low_pass_3db_frequency_available.dev_attr.attr,
        &iio_dev_attr_bridge_switch_en.dev_attr.attr,
-       &iio_dev_attr_ac_excitation_en.dev_attr.attr,
        NULL
 };
 
@@ -595,6 +594,7 @@ static const struct attribute_group ad7192_attribute_group = {
 static struct attribute *ad7195_attributes[] = {
        &iio_dev_attr_filter_low_pass_3db_frequency_available.dev_attr.attr,
        &iio_dev_attr_bridge_switch_en.dev_attr.attr,
+       &iio_dev_attr_ac_excitation_en.dev_attr.attr,
        NULL
 };
 
index 213526c..aea83f3 100644 (file)
@@ -124,6 +124,7 @@ static const struct regmap_config ina2xx_regmap_config = {
 enum ina2xx_ids { ina219, ina226 };
 
 struct ina2xx_config {
+       const char *name;
        u16 config_default;
        int calibration_value;
        int shunt_voltage_lsb;  /* nV */
@@ -155,6 +156,7 @@ struct ina2xx_chip_info {
 
 static const struct ina2xx_config ina2xx_config[] = {
        [ina219] = {
+               .name = "ina219",
                .config_default = INA219_CONFIG_DEFAULT,
                .calibration_value = 4096,
                .shunt_voltage_lsb = 10000,
@@ -164,6 +166,7 @@ static const struct ina2xx_config ina2xx_config[] = {
                .chip_id = ina219,
        },
        [ina226] = {
+               .name = "ina226",
                .config_default = INA226_CONFIG_DEFAULT,
                .calibration_value = 2048,
                .shunt_voltage_lsb = 2500,
@@ -996,7 +999,7 @@ static int ina2xx_probe(struct i2c_client *client)
        /* Patch the current config register with default. */
        val = chip->config->config_default;
 
-       if (id->driver_data == ina226) {
+       if (type == ina226) {
                ina226_set_average(chip, INA226_DEFAULT_AVG, &val);
                ina226_set_int_time_vbus(chip, INA226_DEFAULT_IT, &val);
                ina226_set_int_time_vshunt(chip, INA226_DEFAULT_IT, &val);
@@ -1015,7 +1018,7 @@ static int ina2xx_probe(struct i2c_client *client)
        }
 
        indio_dev->modes = INDIO_DIRECT_MODE;
-       if (id->driver_data == ina226) {
+       if (type == ina226) {
                indio_dev->channels = ina226_channels;
                indio_dev->num_channels = ARRAY_SIZE(ina226_channels);
                indio_dev->info = &ina226_info;
@@ -1024,7 +1027,7 @@ static int ina2xx_probe(struct i2c_client *client)
                indio_dev->num_channels = ARRAY_SIZE(ina219_channels);
                indio_dev->info = &ina219_info;
        }
-       indio_dev->name = id->name;
+       indio_dev->name = id ? id->name : chip->config->name;
 
        ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev,
                                          &ina2xx_setup_ops);
index af6bfcc..eb78a6f 100644 (file)
@@ -916,12 +916,6 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev)
                goto err_vref;
        }
 
-       ret = clk_prepare_enable(priv->core_clk);
-       if (ret) {
-               dev_err(dev, "failed to enable core clk\n");
-               goto err_core_clk;
-       }
-
        regval = FIELD_PREP(MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, 1);
        regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG0,
                           MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, regval);
@@ -948,8 +942,6 @@ err_adc_clk:
        regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3,
                           MESON_SAR_ADC_REG3_ADC_EN, 0);
        meson_sar_adc_set_bandgap(indio_dev, false);
-       clk_disable_unprepare(priv->core_clk);
-err_core_clk:
        regulator_disable(priv->vref);
 err_vref:
        meson_sar_adc_unlock(indio_dev);
@@ -977,8 +969,6 @@ static void meson_sar_adc_hw_disable(struct iio_dev *indio_dev)
 
        meson_sar_adc_set_bandgap(indio_dev, false);
 
-       clk_disable_unprepare(priv->core_clk);
-
        regulator_disable(priv->vref);
 
        if (!ret)
@@ -1211,7 +1201,7 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
        if (IS_ERR(priv->clkin))
                return dev_err_probe(dev, PTR_ERR(priv->clkin), "failed to get clkin\n");
 
-       priv->core_clk = devm_clk_get(dev, "core");
+       priv->core_clk = devm_clk_get_enabled(dev, "core");
        if (IS_ERR(priv->core_clk))
                return dev_err_probe(dev, PTR_ERR(priv->core_clk), "failed to get core clk\n");
 
@@ -1294,15 +1284,26 @@ static int meson_sar_adc_remove(struct platform_device *pdev)
 static int meson_sar_adc_suspend(struct device *dev)
 {
        struct iio_dev *indio_dev = dev_get_drvdata(dev);
+       struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
 
        meson_sar_adc_hw_disable(indio_dev);
 
+       clk_disable_unprepare(priv->core_clk);
+
        return 0;
 }
 
 static int meson_sar_adc_resume(struct device *dev)
 {
        struct iio_dev *indio_dev = dev_get_drvdata(dev);
+       struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
+       int ret;
+
+       ret = clk_prepare_enable(priv->core_clk);
+       if (ret) {
+               dev_err(dev, "failed to enable core clk\n");
+               return ret;
+       }
 
        return meson_sar_adc_hw_enable(indio_dev);
 }
index 943e9e1..b72d39f 100644 (file)
@@ -253,7 +253,7 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
        platform_set_drvdata(pdev, indio_dev);
 
        state->ec = ec->ec_dev;
-       state->msg = devm_kzalloc(&pdev->dev,
+       state->msg = devm_kzalloc(&pdev->dev, sizeof(*state->msg) +
                                max((u16)sizeof(struct ec_params_motion_sense),
                                state->ec->max_response), GFP_KERNEL);
        if (!state->msg)
index 9bf8337..8c8e0bb 100644 (file)
@@ -344,9 +344,12 @@ static int admv1013_update_quad_filters(struct admv1013_state *st)
 
 static int admv1013_update_mixer_vgate(struct admv1013_state *st)
 {
-       unsigned int vcm, mixer_vgate;
+       unsigned int mixer_vgate;
+       int vcm;
 
        vcm = regulator_get_voltage(st->reg);
+       if (vcm < 0)
+               return vcm;
 
        if (vcm < 1800000)
                mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100;
index 6a18b36..b6e6b1d 100644 (file)
@@ -2687,7 +2687,7 @@ unknown_format:
 static int lsm6dsx_get_acpi_mount_matrix(struct device *dev,
                                          struct iio_mount_matrix *orientation)
 {
-       return false;
+       return -EOPNOTSUPP;
 }
 
 #endif
index c117f50..adcba83 100644 (file)
@@ -1888,7 +1888,7 @@ static const struct iio_buffer_setup_ops noop_ring_setup_ops;
 int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
 {
        struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
-       struct fwnode_handle *fwnode;
+       struct fwnode_handle *fwnode = NULL;
        int ret;
 
        if (!indio_dev->info)
@@ -1899,7 +1899,8 @@ int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
        /* If the calling driver did not initialize firmware node, do it here */
        if (dev_fwnode(&indio_dev->dev))
                fwnode = dev_fwnode(&indio_dev->dev);
-       else
+       /* The default dummy IIO device has no parent */
+       else if (indio_dev->dev.parent)
                fwnode = dev_fwnode(indio_dev->dev.parent);
        device_set_node(&indio_dev->dev, fwnode);
 
index 489902b..b50bf89 100644 (file)
@@ -190,7 +190,7 @@ static const struct iio_itime_sel_mul bu27008_itimes[] = {
        .address = BU27008_REG_##data##_LO,                                     \
        .scan_index = BU27008_##color,                                          \
        .scan_type = {                                                          \
-               .sign = 's',                                                    \
+               .sign = 'u',                                                    \
                .realbits = 16,                                                 \
                .storagebits = 16,                                              \
                .endianness = IIO_LE,                                           \
@@ -633,7 +633,7 @@ static int bu27008_try_find_new_time_gain(struct bu27008_data *data, int val,
        for (i = 0; i < data->gts.num_itime; i++) {
                new_time_sel = data->gts.itime_table[i].sel;
                ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts,
-                                       new_time_sel, val, val2 * 1000, gain_sel);
+                                       new_time_sel, val, val2, gain_sel);
                if (!ret)
                        break;
        }
@@ -662,7 +662,7 @@ static int bu27008_set_scale(struct bu27008_data *data,
                goto unlock_out;
 
        ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, time_sel,
-                                               val, val2 * 1000, &gain_sel);
+                                               val, val2, &gain_sel);
        if (ret) {
                ret = bu27008_try_find_new_time_gain(data, val, val2, &gain_sel);
                if (ret)
@@ -677,6 +677,21 @@ unlock_out:
        return ret;
 }
 
+static int bu27008_write_raw_get_fmt(struct iio_dev *indio_dev,
+                                    struct iio_chan_spec const *chan,
+                                    long mask)
+{
+
+       switch (mask) {
+       case IIO_CHAN_INFO_SCALE:
+               return IIO_VAL_INT_PLUS_NANO;
+       case IIO_CHAN_INFO_INT_TIME:
+               return IIO_VAL_INT_PLUS_MICRO;
+       default:
+               return -EINVAL;
+       }
+}
+
 static int bu27008_write_raw(struct iio_dev *idev,
                             struct iio_chan_spec const *chan,
                             int val, int val2, long mask)
@@ -756,6 +771,7 @@ static int bu27008_update_scan_mode(struct iio_dev *idev,
 static const struct iio_info bu27008_info = {
        .read_raw = &bu27008_read_raw,
        .write_raw = &bu27008_write_raw,
+       .write_raw_get_fmt = &bu27008_write_raw_get_fmt,
        .read_avail = &bu27008_read_avail,
        .update_scan_mode = bu27008_update_scan_mode,
        .validate_trigger = iio_validate_own_trigger,
index e63ef57..bf3de85 100644 (file)
@@ -575,7 +575,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
                return -EINVAL;
 
        if (chan == BU27034_CHAN_ALS) {
-               if (val == 0 && val2 == 1000)
+               if (val == 0 && val2 == 1000000)
                        return 0;
 
                return -EINVAL;
@@ -587,7 +587,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
                goto unlock_out;
 
        ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, time_sel,
-                                               val, val2 * 1000, &gain_sel);
+                                               val, val2, &gain_sel);
        if (ret) {
                /*
                 * Could not support scale with given time. Need to change time.
@@ -624,7 +624,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
 
                        /* Can we provide requested scale with this time? */
                        ret = iio_gts_find_gain_sel_for_scale_using_time(
-                               &data->gts, new_time_sel, val, val2 * 1000,
+                               &data->gts, new_time_sel, val, val2,
                                &gain_sel);
                        if (ret)
                                continue;
@@ -1217,6 +1217,21 @@ static int bu27034_read_raw(struct iio_dev *idev,
        }
 }
 
+static int bu27034_write_raw_get_fmt(struct iio_dev *indio_dev,
+                                    struct iio_chan_spec const *chan,
+                                    long mask)
+{
+
+       switch (mask) {
+       case IIO_CHAN_INFO_SCALE:
+               return IIO_VAL_INT_PLUS_NANO;
+       case IIO_CHAN_INFO_INT_TIME:
+               return IIO_VAL_INT_PLUS_MICRO;
+       default:
+               return -EINVAL;
+       }
+}
+
 static int bu27034_write_raw(struct iio_dev *idev,
                             struct iio_chan_spec const *chan,
                             int val, int val2, long mask)
@@ -1267,6 +1282,7 @@ static int bu27034_read_avail(struct iio_dev *idev,
 static const struct iio_info bu27034_info = {
        .read_raw = &bu27034_read_raw,
        .write_raw = &bu27034_write_raw,
+       .write_raw_get_fmt = &bu27034_write_raw_get_fmt,
        .read_avail = &bu27034_read_avail,
 };
 
index 1ee87c3..9891c7d 100644 (file)
@@ -4062,6 +4062,8 @@ static int resolve_prepare_src(struct rdma_id_private *id_priv,
                                           RDMA_CM_ADDR_QUERY)))
                        return -EINVAL;
 
+       } else {
+               memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
        }
 
        if (cma_family(id_priv) != dst_addr->sa_family) {
index 755a9c5..f9ab671 100644 (file)
@@ -85,6 +85,8 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
        dma_addr_t mask;
        int i;
 
+       umem->iova = va = virt;
+
        if (umem->is_odp) {
                unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
 
@@ -100,7 +102,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
         */
        pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
 
-       umem->iova = va = virt;
        /* The best result is the smallest page size that results in the minimum
         * number of required pages. Compute the largest page size that could
         * work based on VA address bits that don't change.
index abef0b8..03cc45a 100644 (file)
@@ -869,7 +869,10 @@ fail:
 int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
 {
        struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+       struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
        struct bnxt_re_dev *rdev = qp->rdev;
+       struct bnxt_qplib_nq *scq_nq = NULL;
+       struct bnxt_qplib_nq *rcq_nq = NULL;
        unsigned int flags;
        int rc;
 
@@ -903,6 +906,15 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
        ib_umem_release(qp->rumem);
        ib_umem_release(qp->sumem);
 
+       /* Flush all the entries of notification queue associated with
+        * given qp.
+        */
+       scq_nq = qplib_qp->scq->nq;
+       rcq_nq = qplib_qp->rcq->nq;
+       bnxt_re_synchronize_nq(scq_nq);
+       if (scq_nq != rcq_nq)
+               bnxt_re_synchronize_nq(rcq_nq);
+
        return 0;
 }
 
index b42166f..63e98e2 100644 (file)
@@ -1253,6 +1253,8 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
 
        rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
        if (rc) {
+               bnxt_unregister_dev(rdev->en_dev);
+               clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
                ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
                return -EINVAL;
        }
@@ -1526,8 +1528,8 @@ static void bnxt_re_remove(struct auxiliary_device *adev)
        }
        bnxt_re_setup_cc(rdev, false);
        ib_unregister_device(&rdev->ibdev);
-       ib_dealloc_device(&rdev->ibdev);
        bnxt_re_dev_uninit(rdev);
+       ib_dealloc_device(&rdev->ibdev);
 skip_remove:
        mutex_unlock(&bnxt_re_mutex);
 }
index 91aed77..a425556 100644 (file)
@@ -381,6 +381,24 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
        spin_unlock_bh(&hwq->lock);
 }
 
+/* bnxt_re_synchronize_nq - self polling notification queue.
+ * @nq      -     notification queue pointer
+ *
+ * This function will start polling entries of a given notification queue
+ * for all pending  entries.
+ * This function is useful to synchronize notification entries while resources
+ * are going away.
+ */
+
+void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq)
+{
+       int budget = nq->budget;
+
+       nq->budget = nq->hwq.max_elements;
+       bnxt_qplib_service_nq(&nq->nq_tasklet);
+       nq->budget = budget;
+}
+
 static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
 {
        struct bnxt_qplib_nq *nq = dev_instance;
@@ -402,19 +420,19 @@ void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
        if (!nq->requested)
                return;
 
-       tasklet_disable(&nq->nq_tasklet);
+       nq->requested = false;
        /* Mask h/w interrupt */
        bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false);
        /* Sync with last running IRQ handler */
        synchronize_irq(nq->msix_vec);
-       if (kill)
-               tasklet_kill(&nq->nq_tasklet);
-
        irq_set_affinity_hint(nq->msix_vec, NULL);
        free_irq(nq->msix_vec, nq);
        kfree(nq->name);
        nq->name = NULL;
-       nq->requested = false;
+
+       if (kill)
+               tasklet_kill(&nq->nq_tasklet);
+       tasklet_disable(&nq->nq_tasklet);
 }
 
 void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
index a428208..404b851 100644 (file)
@@ -553,6 +553,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
                                  struct bnxt_qplib_cqe *cqe,
                                  int num_cqes);
 void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp);
+void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq);
 
 static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx)
 {
index b30e66b..bc3aea4 100644 (file)
@@ -989,19 +989,18 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
        if (!creq->requested)
                return;
 
-       tasklet_disable(&creq->creq_tasklet);
+       creq->requested = false;
        /* Mask h/w interrupts */
        bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
        /* Sync with last running IRQ-handler */
        synchronize_irq(creq->msix_vec);
-       if (kill)
-               tasklet_kill(&creq->creq_tasklet);
-
        free_irq(creq->msix_vec, rcfw);
        kfree(creq->irq_name);
        creq->irq_name = NULL;
-       creq->requested = false;
        atomic_set(&rcfw->rcfw_intr_enabled, 0);
+       if (kill)
+               tasklet_kill(&creq->creq_tasklet);
+       tasklet_disable(&creq->creq_tasklet);
 }
 
 void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
index 5fd8f7c..739d942 100644 (file)
@@ -819,6 +819,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
        }
 
        memset((u8 *)dpit->tbl, 0xFF, bytes);
+       mutex_init(&res->dpi_tbl_lock);
        dpit->priv_db = dpit->ucreg.bar_reg + dpit->ucreg.offset;
 
        return 0;
index 9dbb89e..baaa440 100644 (file)
@@ -12307,6 +12307,7 @@ static void free_cntrs(struct hfi1_devdata *dd)
 
        if (dd->synth_stats_timer.function)
                del_timer_sync(&dd->synth_stats_timer);
+       cancel_work_sync(&dd->update_cntr_work);
        ppd = (struct hfi1_pportdata *)(dd + 1);
        for (i = 0; i < dd->num_pports; i++, ppd++) {
                kfree(ppd->cntrs);
index d88c918..45e3344 100644 (file)
@@ -2712,13 +2712,13 @@ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq,
  */
 void irdma_check_cqp_progress(struct irdma_cqp_timeout *timeout, struct irdma_sc_dev *dev)
 {
-       if (timeout->compl_cqp_cmds != dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]) {
-               timeout->compl_cqp_cmds = dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS];
+       u64 completed_ops = atomic64_read(&dev->cqp->completed_ops);
+
+       if (timeout->compl_cqp_cmds != completed_ops) {
+               timeout->compl_cqp_cmds = completed_ops;
                timeout->count = 0;
-       } else {
-               if (dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS] !=
-                   timeout->compl_cqp_cmds)
-                       timeout->count++;
+       } else if (timeout->compl_cqp_cmds != dev->cqp->requested_ops) {
+               timeout->count++;
        }
 }
 
@@ -2761,7 +2761,7 @@ static int irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, u32 tail,
                if (newtail != tail) {
                        /* SUCCESS */
                        IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
-                       cqp->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]++;
+                       atomic64_inc(&cqp->completed_ops);
                        return 0;
                }
                udelay(cqp->dev->hw_attrs.max_sleep_count);
@@ -3121,8 +3121,8 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
        info->dev->cqp = cqp;
 
        IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size);
-       cqp->dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS] = 0;
-       cqp->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS] = 0;
+       cqp->requested_ops = 0;
+       atomic64_set(&cqp->completed_ops, 0);
        /* for the cqp commands backlog. */
        INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head);
 
@@ -3274,7 +3274,7 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch
        if (ret_code)
                return NULL;
 
-       cqp->dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS]++;
+       cqp->requested_ops++;
        if (!*wqe_idx)
                cqp->polarity = !cqp->polarity;
        wqe = cqp->sq_base[*wqe_idx].elem;
@@ -3363,6 +3363,9 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
        if (polarity != ccq->cq_uk.polarity)
                return -ENOENT;
 
+       /* Ensure CEQE contents are read after valid bit is checked */
+       dma_rmb();
+
        get_64bit_val(cqe, 8, &qp_ctx);
        cqp = (struct irdma_sc_cqp *)(unsigned long)qp_ctx;
        info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, temp);
@@ -3397,7 +3400,7 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
        dma_wmb(); /* make sure shadow area is updated before moving tail */
 
        IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
-       ccq->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]++;
+       atomic64_inc(&cqp->completed_ops);
 
        return ret_code;
 }
@@ -4009,13 +4012,17 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
        u8 polarity;
 
        aeqe = IRDMA_GET_CURRENT_AEQ_ELEM(aeq);
-       get_64bit_val(aeqe, 0, &compl_ctx);
        get_64bit_val(aeqe, 8, &temp);
        polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp);
 
        if (aeq->polarity != polarity)
                return -ENOENT;
 
+       /* Ensure AEQE contents are read after valid bit is checked */
+       dma_rmb();
+
+       get_64bit_val(aeqe, 0, &compl_ctx);
+
        print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8,
                             aeqe, 16, false);
 
index 6014b9d..d06e45d 100644 (file)
@@ -191,32 +191,30 @@ enum irdma_cqp_op_type {
        IRDMA_OP_MANAGE_VF_PBLE_BP              = 25,
        IRDMA_OP_QUERY_FPM_VAL                  = 26,
        IRDMA_OP_COMMIT_FPM_VAL                 = 27,
-       IRDMA_OP_REQ_CMDS                       = 28,
-       IRDMA_OP_CMPL_CMDS                      = 29,
-       IRDMA_OP_AH_CREATE                      = 30,
-       IRDMA_OP_AH_MODIFY                      = 31,
-       IRDMA_OP_AH_DESTROY                     = 32,
-       IRDMA_OP_MC_CREATE                      = 33,
-       IRDMA_OP_MC_DESTROY                     = 34,
-       IRDMA_OP_MC_MODIFY                      = 35,
-       IRDMA_OP_STATS_ALLOCATE                 = 36,
-       IRDMA_OP_STATS_FREE                     = 37,
-       IRDMA_OP_STATS_GATHER                   = 38,
-       IRDMA_OP_WS_ADD_NODE                    = 39,
-       IRDMA_OP_WS_MODIFY_NODE                 = 40,
-       IRDMA_OP_WS_DELETE_NODE                 = 41,
-       IRDMA_OP_WS_FAILOVER_START              = 42,
-       IRDMA_OP_WS_FAILOVER_COMPLETE           = 43,
-       IRDMA_OP_SET_UP_MAP                     = 44,
-       IRDMA_OP_GEN_AE                         = 45,
-       IRDMA_OP_QUERY_RDMA_FEATURES            = 46,
-       IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY          = 47,
-       IRDMA_OP_ADD_LOCAL_MAC_ENTRY            = 48,
-       IRDMA_OP_DELETE_LOCAL_MAC_ENTRY         = 49,
-       IRDMA_OP_CQ_MODIFY                      = 50,
+       IRDMA_OP_AH_CREATE                      = 28,
+       IRDMA_OP_AH_MODIFY                      = 29,
+       IRDMA_OP_AH_DESTROY                     = 30,
+       IRDMA_OP_MC_CREATE                      = 31,
+       IRDMA_OP_MC_DESTROY                     = 32,
+       IRDMA_OP_MC_MODIFY                      = 33,
+       IRDMA_OP_STATS_ALLOCATE                 = 34,
+       IRDMA_OP_STATS_FREE                     = 35,
+       IRDMA_OP_STATS_GATHER                   = 36,
+       IRDMA_OP_WS_ADD_NODE                    = 37,
+       IRDMA_OP_WS_MODIFY_NODE                 = 38,
+       IRDMA_OP_WS_DELETE_NODE                 = 39,
+       IRDMA_OP_WS_FAILOVER_START              = 40,
+       IRDMA_OP_WS_FAILOVER_COMPLETE           = 41,
+       IRDMA_OP_SET_UP_MAP                     = 42,
+       IRDMA_OP_GEN_AE                         = 43,
+       IRDMA_OP_QUERY_RDMA_FEATURES            = 44,
+       IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY          = 45,
+       IRDMA_OP_ADD_LOCAL_MAC_ENTRY            = 46,
+       IRDMA_OP_DELETE_LOCAL_MAC_ENTRY         = 47,
+       IRDMA_OP_CQ_MODIFY                      = 48,
 
        /* Must be last entry*/
-       IRDMA_MAX_CQP_OPS                       = 51,
+       IRDMA_MAX_CQP_OPS                       = 49,
 };
 
 /* CQP SQ WQES */
index 795f7fd..457368e 100644 (file)
@@ -191,6 +191,7 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
        case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
        case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
        case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
+       case IRDMA_AE_AMP_MWBIND_VALID_STAG:
                qp->flush_code = FLUSH_MW_BIND_ERR;
                qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
                break;
@@ -2075,7 +2076,7 @@ void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
                        cqp_request->compl_info.error = info.error;
 
                        if (cqp_request->waiting) {
-                               cqp_request->request_done = true;
+                               WRITE_ONCE(cqp_request->request_done, true);
                                wake_up(&cqp_request->waitq);
                                irdma_put_cqp_request(&rf->cqp, cqp_request);
                        } else {
index def6dd5..2323962 100644 (file)
@@ -161,8 +161,8 @@ struct irdma_cqp_request {
        void (*callback_fcn)(struct irdma_cqp_request *cqp_request);
        void *param;
        struct irdma_cqp_compl_info compl_info;
+       bool request_done; /* READ/WRITE_ONCE macros operate on it */
        bool waiting:1;
-       bool request_done:1;
        bool dynamic:1;
 };
 
index 4ec9639..5625317 100644 (file)
@@ -230,6 +230,9 @@ static int irdma_puda_poll_info(struct irdma_sc_cq *cq,
        if (valid_bit != cq_uk->polarity)
                return -ENOENT;
 
+       /* Ensure CQE contents are read after valid bit is checked */
+       dma_rmb();
+
        if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
                ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3);
 
@@ -243,6 +246,9 @@ static int irdma_puda_poll_info(struct irdma_sc_cq *cq,
                if (polarity != cq_uk->polarity)
                        return -ENOENT;
 
+               /* Ensure ext CQE contents are read after ext valid bit is checked */
+               dma_rmb();
+
                IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring);
                if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring))
                        cq_uk->polarity = !cq_uk->polarity;
index 5ee6860..a207095 100644 (file)
@@ -365,6 +365,8 @@ struct irdma_sc_cqp {
        struct irdma_dcqcn_cc_params dcqcn_params;
        __le64 *host_ctx;
        u64 *scratch_array;
+       u64 requested_ops;
+       atomic64_t completed_ops;
        u32 cqp_id;
        u32 sq_size;
        u32 hw_sq_size;
index dd428d9..280d633 100644 (file)
@@ -1161,7 +1161,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
        }
        wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
        info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
-       info->op_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+       info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
 
        if (info->q_type == IRDMA_CQE_QTYPE_RQ) {
                u32 array_idx;
@@ -1527,6 +1527,9 @@ void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq)
                if (polarity != temp)
                        break;
 
+               /* Ensure CQE contents are read after valid bit is checked */
+               dma_rmb();
+
                get_64bit_val(cqe, 8, &comp_ctx);
                if ((void *)(unsigned long)comp_ctx == q)
                        set_64bit_val(cqe, 8, 0);
index 71e1c5d..eb083f7 100644 (file)
@@ -481,7 +481,7 @@ void irdma_free_cqp_request(struct irdma_cqp *cqp,
        if (cqp_request->dynamic) {
                kfree(cqp_request);
        } else {
-               cqp_request->request_done = false;
+               WRITE_ONCE(cqp_request->request_done, false);
                cqp_request->callback_fcn = NULL;
                cqp_request->waiting = false;
 
@@ -515,7 +515,7 @@ irdma_free_pending_cqp_request(struct irdma_cqp *cqp,
 {
        if (cqp_request->waiting) {
                cqp_request->compl_info.error = true;
-               cqp_request->request_done = true;
+               WRITE_ONCE(cqp_request->request_done, true);
                wake_up(&cqp_request->waitq);
        }
        wait_event_timeout(cqp->remove_wq,
@@ -567,11 +567,11 @@ static int irdma_wait_event(struct irdma_pci_f *rf,
        bool cqp_error = false;
        int err_code = 0;
 
-       cqp_timeout.compl_cqp_cmds = rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS];
+       cqp_timeout.compl_cqp_cmds = atomic64_read(&rf->sc_dev.cqp->completed_ops);
        do {
                irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
                if (wait_event_timeout(cqp_request->waitq,
-                                      cqp_request->request_done,
+                                      READ_ONCE(cqp_request->request_done),
                                       msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS)))
                        break;
 
index 4566566..9d08aa9 100644 (file)
@@ -565,15 +565,15 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
                return (-EOPNOTSUPP);
        }
 
-       if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4      |
-                                         MLX4_IB_RX_HASH_DST_IPV4      |
-                                         MLX4_IB_RX_HASH_SRC_IPV6      |
-                                         MLX4_IB_RX_HASH_DST_IPV6      |
-                                         MLX4_IB_RX_HASH_SRC_PORT_TCP  |
-                                         MLX4_IB_RX_HASH_DST_PORT_TCP  |
-                                         MLX4_IB_RX_HASH_SRC_PORT_UDP  |
-                                         MLX4_IB_RX_HASH_DST_PORT_UDP  |
-                                         MLX4_IB_RX_HASH_INNER)) {
+       if (ucmd->rx_hash_fields_mask & ~(u64)(MLX4_IB_RX_HASH_SRC_IPV4 |
+                                              MLX4_IB_RX_HASH_DST_IPV4 |
+                                              MLX4_IB_RX_HASH_SRC_IPV6 |
+                                              MLX4_IB_RX_HASH_DST_IPV6 |
+                                              MLX4_IB_RX_HASH_SRC_PORT_TCP |
+                                              MLX4_IB_RX_HASH_DST_PORT_TCP |
+                                              MLX4_IB_RX_HASH_SRC_PORT_UDP |
+                                              MLX4_IB_RX_HASH_DST_PORT_UDP |
+                                              MLX4_IB_RX_HASH_INNER)) {
                pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
                         ucmd->rx_hash_fields_mask);
                return (-EOPNOTSUPP);
index 69bba0e..53f4364 100644 (file)
@@ -1393,7 +1393,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
        if (mthca_array_get(&dev->qp_table.qp, mqpn))
                err = -EBUSY;
        else
-               mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp);
+               mthca_array_set(&dev->qp_table.qp, mqpn, qp);
        spin_unlock_irq(&dev->qp_table.lock);
 
        if (err)
index a973905..ed7d4b0 100644 (file)
@@ -64,9 +64,8 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
        inode->i_uid = GLOBAL_ROOT_UID;
        inode->i_gid = GLOBAL_ROOT_GID;
        inode->i_blocks = 0;
-       inode->i_atime = current_time(inode);
+       inode->i_atime = inode_set_ctime_current(inode);
        inode->i_mtime = inode->i_atime;
-       inode->i_ctime = inode->i_atime;
        inode->i_private = data;
        if (S_ISDIR(mode)) {
                inode->i_op = &simple_dir_inode_operations;
index d8a43d8..d9312b5 100644 (file)
@@ -199,7 +199,8 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 
        if (access & ~RXE_ACCESS_SUPPORTED_MW) {
                rxe_err_mw(mw, "access %#x not supported", access);
-               return -EOPNOTSUPP;
+               ret = -EOPNOTSUPP;
+               goto err_drop_mr;
        }
 
        spin_lock_bh(&mw->lock);
index 8f385f9..d5f2a6b 100644 (file)
@@ -83,6 +83,11 @@ static void bcm_aggregate(struct qcom_icc_bcm *bcm)
 
                temp = agg_peak[bucket] * bcm->vote_scale;
                bcm->vote_y[bucket] = bcm_div(temp, bcm->aux_data.unit);
+
+               if (bcm->enable_mask && (bcm->vote_x[bucket] || bcm->vote_y[bucket])) {
+                       bcm->vote_x[bucket] = 0;
+                       bcm->vote_y[bucket] = bcm->enable_mask;
+               }
        }
 
        if (bcm->keepalive && bcm->vote_x[QCOM_ICC_BUCKET_AMC] == 0 &&
index 04391c1..7843d88 100644 (file)
@@ -81,6 +81,7 @@ struct qcom_icc_node {
  * @vote_x: aggregated threshold values, represents sum_bw when @type is bw bcm
  * @vote_y: aggregated threshold values, represents peak_bw when @type is bw bcm
  * @vote_scale: scaling factor for vote_x and vote_y
+ * @enable_mask: optional mask to send as vote instead of vote_x/vote_y
  * @dirty: flag used to indicate whether the bcm needs to be committed
  * @keepalive: flag used to indicate whether a keepalive is required
  * @aux_data: auxiliary data used when calculating threshold values and
@@ -97,6 +98,7 @@ struct qcom_icc_bcm {
        u64 vote_x[QCOM_ICC_NUM_BUCKETS];
        u64 vote_y[QCOM_ICC_NUM_BUCKETS];
        u64 vote_scale;
+       u32 enable_mask;
        bool dirty;
        bool keepalive;
        struct bcm_db aux_data;
index da21cc3..f565386 100644 (file)
@@ -1873,6 +1873,7 @@ static struct qcom_icc_node srvc_snoc = {
 
 static struct qcom_icc_bcm bcm_acv = {
        .name = "ACV",
+       .enable_mask = 0x8,
        .num_nodes = 1,
        .nodes = { &ebi },
 };
index 2d7a8e7..e64c214 100644 (file)
@@ -1337,6 +1337,7 @@ static struct qcom_icc_node qns_mem_noc_sf_disp = {
 
 static struct qcom_icc_bcm bcm_acv = {
        .name = "ACV",
+       .enable_mask = 0x8,
        .num_nodes = 1,
        .nodes = { &ebi },
 };
@@ -1349,6 +1350,7 @@ static struct qcom_icc_bcm bcm_ce0 = {
 
 static struct qcom_icc_bcm bcm_cn0 = {
        .name = "CN0",
+       .enable_mask = 0x1,
        .keepalive = true,
        .num_nodes = 55,
        .nodes = { &qnm_gemnoc_cnoc, &qnm_gemnoc_pcie,
@@ -1383,6 +1385,7 @@ static struct qcom_icc_bcm bcm_cn0 = {
 
 static struct qcom_icc_bcm bcm_co0 = {
        .name = "CO0",
+       .enable_mask = 0x1,
        .num_nodes = 2,
        .nodes = { &qxm_nsp, &qns_nsp_gemnoc },
 };
@@ -1403,6 +1406,7 @@ static struct qcom_icc_bcm bcm_mm0 = {
 
 static struct qcom_icc_bcm bcm_mm1 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 12,
        .nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp,
                   &qnm_camnoc_sf, &qnm_mdp,
@@ -1445,6 +1449,7 @@ static struct qcom_icc_bcm bcm_sh0 = {
 
 static struct qcom_icc_bcm bcm_sh1 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 7,
        .nodes = { &alm_gpu_tcu, &alm_sys_tcu,
                   &qnm_nsp_gemnoc, &qnm_pcie,
@@ -1461,6 +1466,7 @@ static struct qcom_icc_bcm bcm_sn0 = {
 
 static struct qcom_icc_bcm bcm_sn1 = {
        .name = "SN1",
+       .enable_mask = 0x1,
        .num_nodes = 4,
        .nodes = { &qhm_gic, &qxm_pimem,
                   &xm_gic, &qns_gemnoc_gc },
@@ -1492,6 +1498,7 @@ static struct qcom_icc_bcm bcm_sn7 = {
 
 static struct qcom_icc_bcm bcm_acv_disp = {
        .name = "ACV",
+       .enable_mask = 0x1,
        .num_nodes = 1,
        .nodes = { &ebi_disp },
 };
@@ -1510,6 +1517,7 @@ static struct qcom_icc_bcm bcm_mm0_disp = {
 
 static struct qcom_icc_bcm bcm_mm1_disp = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qnm_mdp_disp, &qnm_rot_disp,
                   &qns_mem_noc_sf_disp },
@@ -1523,6 +1531,7 @@ static struct qcom_icc_bcm bcm_sh0_disp = {
 
 static struct qcom_icc_bcm bcm_sh1_disp = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 1,
        .nodes = { &qnm_pcie_disp },
 };
index d823ba9..0864ed2 100644 (file)
@@ -1473,6 +1473,7 @@ static struct qcom_icc_node qns_mem_noc_sf_cam_ife_2 = {
 
 static struct qcom_icc_bcm bcm_acv = {
        .name = "ACV",
+       .enable_mask = 0x8,
        .num_nodes = 1,
        .nodes = { &ebi },
 };
@@ -1485,6 +1486,7 @@ static struct qcom_icc_bcm bcm_ce0 = {
 
 static struct qcom_icc_bcm bcm_cn0 = {
        .name = "CN0",
+       .enable_mask = 0x1,
        .keepalive = true,
        .num_nodes = 54,
        .nodes = { &qsm_cfg, &qhs_ahb2phy0,
@@ -1524,6 +1526,7 @@ static struct qcom_icc_bcm bcm_cn1 = {
 
 static struct qcom_icc_bcm bcm_co0 = {
        .name = "CO0",
+       .enable_mask = 0x1,
        .num_nodes = 2,
        .nodes = { &qxm_nsp, &qns_nsp_gemnoc },
 };
@@ -1549,6 +1552,7 @@ static struct qcom_icc_bcm bcm_mm0 = {
 
 static struct qcom_icc_bcm bcm_mm1 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 8,
        .nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp,
                   &qnm_camnoc_sf, &qnm_vapss_hcp,
@@ -1589,6 +1593,7 @@ static struct qcom_icc_bcm bcm_sh0 = {
 
 static struct qcom_icc_bcm bcm_sh1 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 13,
        .nodes = { &alm_gpu_tcu, &alm_sys_tcu,
                   &chm_apps, &qnm_gpu,
@@ -1608,6 +1613,7 @@ static struct qcom_icc_bcm bcm_sn0 = {
 
 static struct qcom_icc_bcm bcm_sn1 = {
        .name = "SN1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qhm_gic, &xm_gic,
                   &qns_gemnoc_gc },
@@ -1633,6 +1639,7 @@ static struct qcom_icc_bcm bcm_sn7 = {
 
 static struct qcom_icc_bcm bcm_acv_disp = {
        .name = "ACV",
+       .enable_mask = 0x1,
        .num_nodes = 1,
        .nodes = { &ebi_disp },
 };
@@ -1657,12 +1664,14 @@ static struct qcom_icc_bcm bcm_sh0_disp = {
 
 static struct qcom_icc_bcm bcm_sh1_disp = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 2,
        .nodes = { &qnm_mnoc_hf_disp, &qnm_pcie_disp },
 };
 
 static struct qcom_icc_bcm bcm_acv_cam_ife_0 = {
        .name = "ACV",
+       .enable_mask = 0x0,
        .num_nodes = 1,
        .nodes = { &ebi_cam_ife_0 },
 };
@@ -1681,6 +1690,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_0 = {
 
 static struct qcom_icc_bcm bcm_mm1_cam_ife_0 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 4,
        .nodes = { &qnm_camnoc_hf_cam_ife_0, &qnm_camnoc_icp_cam_ife_0,
                   &qnm_camnoc_sf_cam_ife_0, &qns_mem_noc_sf_cam_ife_0 },
@@ -1694,6 +1704,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_0 = {
 
 static struct qcom_icc_bcm bcm_sh1_cam_ife_0 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qnm_mnoc_hf_cam_ife_0, &qnm_mnoc_sf_cam_ife_0,
                   &qnm_pcie_cam_ife_0 },
@@ -1701,6 +1712,7 @@ static struct qcom_icc_bcm bcm_sh1_cam_ife_0 = {
 
 static struct qcom_icc_bcm bcm_acv_cam_ife_1 = {
        .name = "ACV",
+       .enable_mask = 0x0,
        .num_nodes = 1,
        .nodes = { &ebi_cam_ife_1 },
 };
@@ -1719,6 +1731,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_1 = {
 
 static struct qcom_icc_bcm bcm_mm1_cam_ife_1 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 4,
        .nodes = { &qnm_camnoc_hf_cam_ife_1, &qnm_camnoc_icp_cam_ife_1,
                   &qnm_camnoc_sf_cam_ife_1, &qns_mem_noc_sf_cam_ife_1 },
@@ -1732,6 +1745,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_1 = {
 
 static struct qcom_icc_bcm bcm_sh1_cam_ife_1 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qnm_mnoc_hf_cam_ife_1, &qnm_mnoc_sf_cam_ife_1,
                   &qnm_pcie_cam_ife_1 },
@@ -1739,6 +1753,7 @@ static struct qcom_icc_bcm bcm_sh1_cam_ife_1 = {
 
 static struct qcom_icc_bcm bcm_acv_cam_ife_2 = {
        .name = "ACV",
+       .enable_mask = 0x0,
        .num_nodes = 1,
        .nodes = { &ebi_cam_ife_2 },
 };
@@ -1757,6 +1772,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_2 = {
 
 static struct qcom_icc_bcm bcm_mm1_cam_ife_2 = {
        .name = "MM1",
+       .enable_mask = 0x1,
        .num_nodes = 4,
        .nodes = { &qnm_camnoc_hf_cam_ife_2, &qnm_camnoc_icp_cam_ife_2,
                   &qnm_camnoc_sf_cam_ife_2, &qns_mem_noc_sf_cam_ife_2 },
@@ -1770,6 +1786,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_2 = {
 
 static struct qcom_icc_bcm bcm_sh1_cam_ife_2 = {
        .name = "SH1",
+       .enable_mask = 0x1,
        .num_nodes = 3,
        .nodes = { &qnm_mnoc_hf_cam_ife_2, &qnm_mnoc_sf_cam_ife_2,
                   &qnm_pcie_cam_ife_2 },
index 3ebd4b6..05c0fb2 100644 (file)
@@ -34,8 +34,9 @@ static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t ma
        }
 
        ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL);
-       if (ret < min)
+       if (ret < 0)
                goto out;
+
        mm->pasid = ret;
        ret = 0;
 out:
index da340f1..caaf563 100644 (file)
@@ -2891,14 +2891,11 @@ static int iommu_setup_default_domain(struct iommu_group *group,
                ret = __iommu_group_set_domain_internal(
                        group, dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
                if (WARN_ON(ret))
-                       goto out_free;
+                       goto out_free_old;
        } else {
                ret = __iommu_group_set_domain(group, dom);
-               if (ret) {
-                       iommu_domain_free(dom);
-                       group->default_domain = old_dom;
-                       return ret;
-               }
+               if (ret)
+                       goto err_restore_def_domain;
        }
 
        /*
@@ -2911,20 +2908,24 @@ static int iommu_setup_default_domain(struct iommu_group *group,
                for_each_group_device(group, gdev) {
                        ret = iommu_create_device_direct_mappings(dom, gdev->dev);
                        if (ret)
-                               goto err_restore;
+                               goto err_restore_domain;
                }
        }
 
-err_restore:
-       if (old_dom) {
+out_free_old:
+       if (old_dom)
+               iommu_domain_free(old_dom);
+       return ret;
+
+err_restore_domain:
+       if (old_dom)
                __iommu_group_set_domain_internal(
                        group, old_dom, IOMMU_SET_DOMAIN_MUST_SUCCEED);
+err_restore_def_domain:
+       if (old_dom) {
                iommu_domain_free(dom);
-               old_dom = NULL;
+               group->default_domain = old_dom;
        }
-out_free:
-       if (old_dom)
-               iommu_domain_free(old_dom);
        return ret;
 }
 
index 29d0566..ed2937a 100644 (file)
@@ -109,10 +109,7 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
  */
 void iommufd_device_unbind(struct iommufd_device *idev)
 {
-       bool was_destroyed;
-
-       was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj);
-       WARN_ON(!was_destroyed);
+       iommufd_object_destroy_user(idev->ictx, &idev->obj);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
 
@@ -382,7 +379,7 @@ void iommufd_device_detach(struct iommufd_device *idev)
        mutex_unlock(&hwpt->devices_lock);
 
        if (hwpt->auto_domain)
-               iommufd_object_destroy_user(idev->ictx, &hwpt->obj);
+               iommufd_object_deref_user(idev->ictx, &hwpt->obj);
        else
                refcount_dec(&hwpt->obj.users);
 
@@ -456,10 +453,7 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
  */
 void iommufd_access_destroy(struct iommufd_access *access)
 {
-       bool was_destroyed;
-
-       was_destroyed = iommufd_object_destroy_user(access->ictx, &access->obj);
-       WARN_ON(!was_destroyed);
+       iommufd_object_destroy_user(access->ictx, &access->obj);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
 
index b38e67d..f979098 100644 (file)
@@ -176,8 +176,19 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
                                      struct iommufd_object *obj);
 void iommufd_object_finalize(struct iommufd_ctx *ictx,
                             struct iommufd_object *obj);
-bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
-                                struct iommufd_object *obj);
+void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+                                  struct iommufd_object *obj, bool allow_fail);
+static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+                                              struct iommufd_object *obj)
+{
+       __iommufd_object_destroy_user(ictx, obj, false);
+}
+static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
+                                            struct iommufd_object *obj)
+{
+       __iommufd_object_destroy_user(ictx, obj, true);
+}
+
 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
                                             size_t size,
                                             enum iommufd_object_type type);
index 3fbe636..4cf5f73 100644 (file)
@@ -117,13 +117,55 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
 }
 
 /*
+ * Remove the given object id from the xarray if the only reference to the
+ * object is held by the xarray. The caller must call ops destroy().
+ */
+static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx,
+                                                   u32 id, bool extra_put)
+{
+       struct iommufd_object *obj;
+       XA_STATE(xas, &ictx->objects, id);
+
+       xa_lock(&ictx->objects);
+       obj = xas_load(&xas);
+       if (xa_is_zero(obj) || !obj) {
+               obj = ERR_PTR(-ENOENT);
+               goto out_xa;
+       }
+
+       /*
+        * If the caller is holding a ref on obj we put it here under the
+        * spinlock.
+        */
+       if (extra_put)
+               refcount_dec(&obj->users);
+
+       if (!refcount_dec_if_one(&obj->users)) {
+               obj = ERR_PTR(-EBUSY);
+               goto out_xa;
+       }
+
+       xas_store(&xas, NULL);
+       if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
+               ictx->vfio_ioas = NULL;
+
+out_xa:
+       xa_unlock(&ictx->objects);
+
+       /* The returned object reference count is zero */
+       return obj;
+}
+
+/*
  * The caller holds a users refcount and wants to destroy the object. Returns
  * true if the object was destroyed. In all cases the caller no longer has a
  * reference on obj.
  */
-bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
-                                struct iommufd_object *obj)
+void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+                                  struct iommufd_object *obj, bool allow_fail)
 {
+       struct iommufd_object *ret;
+
        /*
         * The purpose of the destroy_rwsem is to ensure deterministic
         * destruction of objects used by external drivers and destroyed by this
@@ -131,22 +173,22 @@ bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
         * side of this, such as during ioctl execution.
         */
        down_write(&obj->destroy_rwsem);
-       xa_lock(&ictx->objects);
-       refcount_dec(&obj->users);
-       if (!refcount_dec_if_one(&obj->users)) {
-               xa_unlock(&ictx->objects);
-               up_write(&obj->destroy_rwsem);
-               return false;
-       }
-       __xa_erase(&ictx->objects, obj->id);
-       if (ictx->vfio_ioas && &ictx->vfio_ioas->obj == obj)
-               ictx->vfio_ioas = NULL;
-       xa_unlock(&ictx->objects);
+       ret = iommufd_object_remove(ictx, obj->id, true);
        up_write(&obj->destroy_rwsem);
 
+       if (allow_fail && IS_ERR(ret))
+               return;
+
+       /*
+        * If there is a bug and we couldn't destroy the object then we did put
+        * back the caller's refcount and will eventually try to free it again
+        * during close.
+        */
+       if (WARN_ON(IS_ERR(ret)))
+               return;
+
        iommufd_object_ops[obj->type].destroy(obj);
        kfree(obj);
-       return true;
 }
 
 static int iommufd_destroy(struct iommufd_ucmd *ucmd)
@@ -154,13 +196,11 @@ static int iommufd_destroy(struct iommufd_ucmd *ucmd)
        struct iommu_destroy *cmd = ucmd->cmd;
        struct iommufd_object *obj;
 
-       obj = iommufd_get_object(ucmd->ictx, cmd->id, IOMMUFD_OBJ_ANY);
+       obj = iommufd_object_remove(ucmd->ictx, cmd->id, false);
        if (IS_ERR(obj))
                return PTR_ERR(obj);
-       iommufd_ref_to_users(obj);
-       /* See iommufd_ref_to_users() */
-       if (!iommufd_object_destroy_user(ucmd->ictx, obj))
-               return -EBUSY;
+       iommufd_object_ops[obj->type].destroy(obj);
+       kfree(obj);
        return 0;
 }
 
index 412ca96..8d9aa29 100644 (file)
@@ -297,7 +297,7 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns)
        batch->pfns[0] = batch->pfns[batch->end - 1] +
                         (batch->npfns[batch->end - 1] - keep_pfns);
        batch->npfns[0] = keep_pfns;
-       batch->end = 0;
+       batch->end = 1;
 }
 
 static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns)
index fa113cb..9745a11 100644 (file)
@@ -60,7 +60,6 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
@@ -82,6 +81,7 @@ struct bcm6345_l1_chip {
 };
 
 struct bcm6345_l1_cpu {
+       struct bcm6345_l1_chip  *intc;
        void __iomem            *map_base;
        unsigned int            parent_irq;
        u32                     enable_cache[];
@@ -115,17 +115,11 @@ static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc,
 
 static void bcm6345_l1_irq_handle(struct irq_desc *desc)
 {
-       struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc);
-       struct bcm6345_l1_cpu *cpu;
+       struct bcm6345_l1_cpu *cpu = irq_desc_get_handler_data(desc);
+       struct bcm6345_l1_chip *intc = cpu->intc;
        struct irq_chip *chip = irq_desc_get_chip(desc);
        unsigned int idx;
 
-#ifdef CONFIG_SMP
-       cpu = intc->cpus[cpu_logical_map(smp_processor_id())];
-#else
-       cpu = intc->cpus[0];
-#endif
-
        chained_irq_enter(chip, desc);
 
        for (idx = 0; idx < intc->n_words; idx++) {
@@ -253,6 +247,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
        if (!cpu)
                return -ENOMEM;
 
+       cpu->intc = intc;
        cpu->map_base = ioremap(res.start, sz);
        if (!cpu->map_base)
                return -ENOMEM;
@@ -271,7 +266,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
                return -EINVAL;
        }
        irq_set_chained_handler_and_data(cpu->parent_irq,
-                                               bcm6345_l1_irq_handle, intc);
+                                               bcm6345_l1_irq_handle, cpu);
 
        return 0;
 }
index a62b962..24ca1d6 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
index 091b0fe..5559c94 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/io.h>
index 3989d16..a275a80 100644 (file)
@@ -4,7 +4,7 @@
  */
 #include <linux/module.h>
 #include <linux/clk.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/irqchip/arm-gic.h>
 #include <linux/platform_device.h>
index 634263d..8e87fc3 100644 (file)
@@ -9,8 +9,6 @@
 
 #include <linux/acpi.h>
 #include <linux/acpi_iort.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
 #include <linux/irq.h>
 #include <linux/msi.h>
 #include <linux/of.h>
index 1994541..e0c2b10 100644 (file)
@@ -273,13 +273,23 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags)
        raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
 }
 
+static struct irq_chip its_vpe_irq_chip;
+
 static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
 {
-       struct its_vlpi_map *map = get_vlpi_map(d);
+       struct its_vpe *vpe = NULL;
        int cpu;
 
-       if (map) {
-               cpu = vpe_to_cpuid_lock(map->vpe, flags);
+       if (d->chip == &its_vpe_irq_chip) {
+               vpe = irq_data_get_irq_chip_data(d);
+       } else {
+               struct its_vlpi_map *map = get_vlpi_map(d);
+               if (map)
+                       vpe = map->vpe;
+       }
+
+       if (vpe) {
+               cpu = vpe_to_cpuid_lock(vpe, flags);
        } else {
                /* Physical LPIs are already locked via the irq_desc lock */
                struct its_device *its_dev = irq_data_get_irq_chip_data(d);
@@ -293,10 +303,18 @@ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
 
 static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
 {
-       struct its_vlpi_map *map = get_vlpi_map(d);
+       struct its_vpe *vpe = NULL;
+
+       if (d->chip == &its_vpe_irq_chip) {
+               vpe = irq_data_get_irq_chip_data(d);
+       } else {
+               struct its_vlpi_map *map = get_vlpi_map(d);
+               if (map)
+                       vpe = map->vpe;
+       }
 
-       if (map)
-               vpe_to_cpuid_unlock(map->vpe, flags);
+       if (vpe)
+               vpe_to_cpuid_unlock(vpe, flags);
 }
 
 static struct its_collection *valid_col(struct its_collection *col)
@@ -1433,14 +1451,29 @@ static void wait_for_syncr(void __iomem *rdbase)
                cpu_relax();
 }
 
-static void direct_lpi_inv(struct irq_data *d)
+static void __direct_lpi_inv(struct irq_data *d, u64 val)
 {
-       struct its_vlpi_map *map = get_vlpi_map(d);
        void __iomem *rdbase;
        unsigned long flags;
-       u64 val;
        int cpu;
 
+       /* Target the redistributor this LPI is currently routed to */
+       cpu = irq_to_cpuid_lock(d, &flags);
+       raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
+
+       rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
+       gic_write_lpir(val, rdbase + GICR_INVLPIR);
+       wait_for_syncr(rdbase);
+
+       raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
+       irq_to_cpuid_unlock(d, flags);
+}
+
+static void direct_lpi_inv(struct irq_data *d)
+{
+       struct its_vlpi_map *map = get_vlpi_map(d);
+       u64 val;
+
        if (map) {
                struct its_device *its_dev = irq_data_get_irq_chip_data(d);
 
@@ -1453,15 +1486,7 @@ static void direct_lpi_inv(struct irq_data *d)
                val = d->hwirq;
        }
 
-       /* Target the redistributor this LPI is currently routed to */
-       cpu = irq_to_cpuid_lock(d, &flags);
-       raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
-       rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
-       gic_write_lpir(val, rdbase + GICR_INVLPIR);
-
-       wait_for_syncr(rdbase);
-       raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
-       irq_to_cpuid_unlock(d, flags);
+       __direct_lpi_inv(d, val);
 }
 
 static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
@@ -3953,18 +3978,10 @@ static void its_vpe_send_inv(struct irq_data *d)
 {
        struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
 
-       if (gic_rdists->has_direct_lpi) {
-               void __iomem *rdbase;
-
-               /* Target the redistributor this VPE is currently known on */
-               raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
-               rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
-               gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
-               wait_for_syncr(rdbase);
-               raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
-       } else {
+       if (gic_rdists->has_direct_lpi)
+               __direct_lpi_inv(d, d->parent_data->hwirq);
+       else
                its_vpe_send_cmd(vpe, its_send_inv);
-       }
 }
 
 static void its_vpe_mask_irq(struct irq_data *d)
@@ -4727,7 +4744,8 @@ static bool __maybe_unused its_enable_rk3588001(void *data)
 {
        struct its_node *its = data;
 
-       if (!of_machine_is_compatible("rockchip,rk3588"))
+       if (!of_machine_is_compatible("rockchip,rk3588") &&
+           !of_machine_is_compatible("rockchip,rk3588s"))
                return false;
 
        its->flags |= ITS_FLAGS_FORCE_NON_SHAREABLE;
index 0c6c1af..eedfa8e 100644 (file)
@@ -69,6 +69,8 @@ struct gic_chip_data {
 static void __iomem *t241_dist_base_alias[T241_CHIPS_MAX] __read_mostly;
 static DEFINE_STATIC_KEY_FALSE(gic_nvidia_t241_erratum);
 
+static DEFINE_STATIC_KEY_FALSE(gic_arm64_2941627_erratum);
+
 static struct gic_chip_data gic_data __read_mostly;
 static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
 
@@ -592,10 +594,39 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
        gic_irq_set_prio(d, GICD_INT_DEF_PRI);
 }
 
+static bool gic_arm64_erratum_2941627_needed(struct irq_data *d)
+{
+       enum gic_intid_range range;
+
+       if (!static_branch_unlikely(&gic_arm64_2941627_erratum))
+               return false;
+
+       range = get_intid_range(d);
+
+       /*
+        * The workaround is needed if the IRQ is an SPI and
+        * the target cpu is different from the one we are
+        * executing on.
+        */
+       return (range == SPI_RANGE || range == ESPI_RANGE) &&
+               !cpumask_test_cpu(raw_smp_processor_id(),
+                                 irq_data_get_effective_affinity_mask(d));
+}
+
 static void gic_eoi_irq(struct irq_data *d)
 {
        write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
        isb();
+
+       if (gic_arm64_erratum_2941627_needed(d)) {
+               /*
+                * Make sure the GIC stream deactivate packet
+                * issued by ICC_EOIR1_EL1 has completed before
+                * deactivating through GICD_IACTIVER.
+                */
+               dsb(sy);
+               gic_poke_irq(d, GICD_ICACTIVER);
+       }
 }
 
 static void gic_eoimode1_eoi_irq(struct irq_data *d)
@@ -606,7 +637,11 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d)
         */
        if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d))
                return;
-       gic_write_dir(gic_irq(d));
+
+       if (!gic_arm64_erratum_2941627_needed(d))
+               gic_write_dir(gic_irq(d));
+       else
+               gic_poke_irq(d, GICD_ICACTIVER);
 }
 
 static int gic_set_type(struct irq_data *d, unsigned int type)
@@ -1816,6 +1851,12 @@ static bool gic_enable_quirk_asr8601(void *data)
        return true;
 }
 
+static bool gic_enable_quirk_arm64_2941627(void *data)
+{
+       static_branch_enable(&gic_arm64_2941627_erratum);
+       return true;
+}
+
 static const struct gic_quirk gic_quirks[] = {
        {
                .desc   = "GICv3: Qualcomm MSM8996 broken firmware",
@@ -1864,6 +1905,25 @@ static const struct gic_quirk gic_quirks[] = {
                .init   = gic_enable_quirk_nvidia_t241,
        },
        {
+               /*
+                * GIC-700: 2941627 workaround - IP variant [0,1]
+                *
+                */
+               .desc   = "GICv3: ARM64 erratum 2941627",
+               .iidr   = 0x0400043b,
+               .mask   = 0xff0e0fff,
+               .init   = gic_enable_quirk_arm64_2941627,
+       },
+       {
+               /*
+                * GIC-700: 2941627 workaround - IP variant [2]
+                */
+               .desc   = "GICv3: ARM64 erratum 2941627",
+               .iidr   = 0x0402043b,
+               .mask   = 0xff0f0fff,
+               .init   = gic_enable_quirk_arm64_2941627,
+       },
+       {
        }
 };
 
index b70ce0d..115bdcf 100644 (file)
@@ -340,7 +340,7 @@ static void i8259_irq_dispatch(struct irq_desc *desc)
        generic_handle_domain_irq(domain, hwirq);
 }
 
-int __init i8259_of_init(struct device_node *node, struct device_node *parent)
+static int __init i8259_of_init(struct device_node *node, struct device_node *parent)
 {
        struct irq_domain *domain;
        unsigned int parent_irq;
index 80aaea8..6d9a082 100644 (file)
@@ -50,8 +50,9 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/pm_runtime.h>
 
index 96230a0..bd95433 100644 (file)
@@ -10,8 +10,9 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/spinlock.h>
 
index 229039e..90d41c1 100644 (file)
@@ -339,8 +339,8 @@ static int __init imx_mu_of_init(struct device_node *dn,
        msi_data->msiir_addr = res->start + msi_data->cfg->xTR;
 
        irq = platform_get_irq(pdev, 0);
-       if (irq <= 0)
-               return -ENODEV;
+       if (irq < 0)
+               return irq;
 
        platform_set_drvdata(pdev, msi_data);
 
index ba9792e..a36396d 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/irqdomain.h>
 #include <linux/irqchip.h>
 #include <linux/of.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 
index 92d8aa2..1623cd7 100644 (file)
@@ -144,7 +144,7 @@ static int eiointc_router_init(unsigned int cpu)
        int i, bit;
        uint32_t data;
        uint32_t node = cpu_to_eio_node(cpu);
-       uint32_t index = eiointc_index(node);
+       int index = eiointc_index(node);
 
        if (index < 0) {
                pr_err("Error: invalid nodemap!\n");
index fc8bf1f..0bff728 100644 (file)
@@ -15,7 +15,6 @@
 #include <linux/platform_device.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/syscore_ops.h>
 
 /* Registers */
index 93a71f6..63db8e2 100644 (file)
@@ -12,9 +12,9 @@
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
 #include <linux/syscore_ops.h>
 
 /* Registers */
index f5ba3f9..f31a262 100644 (file)
@@ -349,8 +349,7 @@ static int ls_scfg_msi_probe(struct platform_device *pdev)
 
        msi_data->cfg = (struct ls_scfg_msi_cfg *) match->data;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       msi_data->regs = devm_ioremap_resource(&pdev->dev, res);
+       msi_data->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
        if (IS_ERR(msi_data->regs)) {
                dev_err(&pdev->dev, "failed to initialize 'regs'\n");
                return PTR_ERR(msi_data->regs);
index 8b81271..3eb1f8c 100644 (file)
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/irqchip/irq-madera.h>
 #include <linux/mfd/madera/core.h>
 #include <linux/mfd/madera/pdata.h>
index 7da18ef..f88df39 100644 (file)
@@ -150,6 +150,10 @@ static const struct meson_gpio_irq_params s4_params = {
        INIT_MESON_S4_COMMON_DATA(82)
 };
 
+static const struct meson_gpio_irq_params c3_params = {
+       INIT_MESON_S4_COMMON_DATA(55)
+};
+
 static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = {
        { .compatible = "amlogic,meson8-gpio-intc", .data = &meson8_params },
        { .compatible = "amlogic,meson8b-gpio-intc", .data = &meson8b_params },
@@ -160,6 +164,7 @@ static const struct of_device_id meson_irq_gpio_matches[] __maybe_unused = {
        { .compatible = "amlogic,meson-sm1-gpio-intc", .data = &sm1_params },
        { .compatible = "amlogic,meson-a1-gpio-intc", .data = &a1_params },
        { .compatible = "amlogic,meson-s4-gpio-intc", .data = &s4_params },
+       { .compatible = "amlogic,c3-gpio-intc", .data = &c3_params },
        { }
 };
 
index 6d5ecc1..76253e8 100644 (file)
@@ -557,7 +557,7 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
        return gic_irq_domain_map(d, virq, hwirq);
 }
 
-void gic_irq_domain_free(struct irq_domain *d, unsigned int virq,
+static void gic_irq_domain_free(struct irq_domain *d, unsigned int virq,
                         unsigned int nr_irqs)
 {
 }
index 4ecef6d..a48dbe9 100644 (file)
@@ -377,8 +377,7 @@ static int mvebu_sei_probe(struct platform_device *pdev)
        mutex_init(&sei->cp_msi_lock);
        raw_spin_lock_init(&sei->mask_lock);
 
-       sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       sei->base = devm_ioremap_resource(sei->dev, sei->res);
+       sei->base = devm_platform_get_and_ioremap_resource(pdev, 0, &sei->res);
        if (IS_ERR(sei->base))
                return PTR_ERR(sei->base);
 
index 17c2c7a..4e4e874 100644 (file)
@@ -57,8 +57,7 @@ static int __init orion_irq_init(struct device_node *np,
        struct resource r;
 
        /* count number of irq chips by valid reg addresses */
-       while (of_address_to_resource(np, num_chips, &r) == 0)
-               num_chips++;
+       num_chips = of_address_count(np);
 
        orion_irq_domain = irq_domain_add_linear(np,
                                num_chips * ORION_IRQS_PER_CHIP,
index fa8d89b..0f64ecb 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 
 /*
@@ -565,8 +565,8 @@ static int pruss_intc_probe(struct platform_device *pdev)
                        continue;
 
                irq = platform_get_irq_byname(pdev, irq_names[i]);
-               if (irq <= 0) {
-                       ret = (irq == 0) ? -EINVAL : irq;
+               if (irq < 0) {
+                       ret = irq;
                        goto fail_irq;
                }
 
index d306146..7124565 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/mailbox_client.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
 #include <linux/slab.h>
index 26e4c17..fa19585 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
 #include <linux/pm_runtime.h>
 
 #define INTC_IRQPIN_MAX 8 /* maximum 8 interrupts per driver instance */
index 819a122..de71bb3 100644 (file)
@@ -10,7 +10,7 @@
 #include <dt-bindings/interrupt-controller/irq-st.h>
 #include <linux/err.h>
 #include <linux/mfd/syscon.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
 #include <linux/slab.h>
index b5fa76c..d8ba5fb 100644 (file)
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/syscore_ops.h>
 
 #include <dt-bindings/interrupt-controller/arm-gic.h>
index 21d4979..e760b12 100644 (file)
@@ -19,7 +19,6 @@
 #include <linux/irqdomain.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
 
index 8a0e692..6805863 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/irqchip.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
-#include <linux/of_platform.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
index 7133f9f..b83f5cb 100644 (file)
@@ -15,9 +15,9 @@
 #include <linux/msi.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/soc/ti/ti_sci_inta_msi.h>
 #include <linux/soc/ti/ti_sci_protocol.h>
index 1186f1e..c027cd9 100644 (file)
@@ -12,9 +12,9 @@
 #include <linux/io.h>
 #include <linux/irqchip.h>
 #include <linux/irqdomain.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/soc/ti/ti_sci_protocol.h>
 
 /**
index 716b1bb..601f934 100644 (file)
@@ -12,7 +12,6 @@
 #include <linux/irqdomain.h>
 #include <linux/kernel.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
index ab12328..0c18d1f 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/irqdomain.h>
 #include <linux/irq.h>
 #include <linux/irqchip.h>
+#include <linux/irqchip/xtensa-pic.h>
 #include <linux/of.h>
 
 unsigned int cached_irq_mask;
index 7899607..1eeb0d0 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <linux/acpi.h>
 #include <linux/init.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/irqchip.h>
 #include <linux/platform_device.h>
index d96916c..a32c0d2 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/soc/qcom/irq.h>
 #include <linux/spinlock.h>
index c0331b2..fe391de 100644 (file)
@@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
                *z1t = cpu_to_le16(new_z1);     /* now send data */
                if (bch->tx_idx < bch->tx_skb->len)
                        return;
-               dev_kfree_skb(bch->tx_skb);
+               dev_kfree_skb_any(bch->tx_skb);
                if (get_next_bframe(bch))
                        goto next_t_frame;
                return;
@@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
        }
        bz->za[new_f1].z1 = cpu_to_le16(new_z1);        /* for next buffer */
        bz->f1 = new_f1;        /* next frame */
-       dev_kfree_skb(bch->tx_skb);
+       dev_kfree_skb_any(bch->tx_skb);
        get_next_bframe(bch);
 }
 
@@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch)
        if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len)
                hfcpci_fill_fifo(bch);
        else {
-               dev_kfree_skb(bch->tx_skb);
+               dev_kfree_skb_any(bch->tx_skb);
                if (get_next_bframe(bch))
                        hfcpci_fill_fifo(bch);
        }
@@ -2277,7 +2277,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
                return 0;
 
        if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) {
-               spin_lock(&hc->lock);
+               spin_lock_irq(&hc->lock);
                bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
                if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */
                        main_rec_hfcpci(bch);
@@ -2288,7 +2288,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
                        main_rec_hfcpci(bch);
                        tx_birq(bch);
                }
-               spin_unlock(&hc->lock);
+               spin_unlock_irq(&hc->lock);
        }
        return 0;
 }
index fa09d51..baf3125 100644 (file)
@@ -247,7 +247,7 @@ extern void dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp);
 extern int dsp_cmx_conf(struct dsp *dsp, u32 conf_id);
 extern void dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb);
 extern void dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb);
-extern void dsp_cmx_send(void *arg);
+extern void dsp_cmx_send(struct timer_list *arg);
 extern void dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb);
 extern int dsp_cmx_del_conf_member(struct dsp *dsp);
 extern int dsp_cmx_del_conf(struct dsp_conf *conf);
index 357b875..61cb45c 100644 (file)
@@ -1614,7 +1614,7 @@ static u16        dsp_count; /* last sample count */
 static int     dsp_count_valid; /* if we have last sample count */
 
 void
-dsp_cmx_send(void *arg)
+dsp_cmx_send(struct timer_list *arg)
 {
        struct dsp_conf *conf;
        struct dsp_conf_member *member;
index 3860845..fae95f1 100644 (file)
@@ -1195,7 +1195,7 @@ static int __init dsp_init(void)
        }
 
        /* set sample timer */
-       timer_setup(&dsp_spl_tl, (void *)dsp_cmx_send, 0);
+       timer_setup(&dsp_spl_tl, dsp_cmx_send, 0);
        dsp_spl_tl.expires = jiffies + dsp_tics;
        dsp_spl_jiffies = dsp_spl_tl.expires;
        add_timer(&dsp_spl_tl);
index c9bc5a9..03c58e5 100644 (file)
@@ -406,15 +406,15 @@ static ssize_t interval_store(struct device *dev,
 
 static DEVICE_ATTR_RW(interval);
 
-static ssize_t hw_control_show(struct device *dev,
-                              struct device_attribute *attr, char *buf)
+static ssize_t offloaded_show(struct device *dev,
+                             struct device_attribute *attr, char *buf)
 {
        struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
 
        return sprintf(buf, "%d\n", trigger_data->hw_control);
 }
 
-static DEVICE_ATTR_RO(hw_control);
+static DEVICE_ATTR_RO(offloaded);
 
 static struct attribute *netdev_trig_attrs[] = {
        &dev_attr_device_name.attr,
@@ -427,7 +427,7 @@ static struct attribute *netdev_trig_attrs[] = {
        &dev_attr_rx.attr,
        &dev_attr_tx.attr,
        &dev_attr_interval.attr,
-       &dev_attr_hw_control.attr,
+       &dev_attr_offloaded.attr,
        NULL
 };
 ATTRIBUTE_GROUPS(netdev_trig);
index 493a871..8bd2ad7 100644 (file)
@@ -857,7 +857,13 @@ struct smq_policy {
 
        struct background_tracker *bg_work;
 
-       bool migrations_allowed;
+       bool migrations_allowed:1;
+
+       /*
+        * If this is set the policy will try and clean the whole cache
+        * even if the device is not idle.
+        */
+       bool cleaner:1;
 };
 
 /*----------------------------------------------------------------*/
@@ -1138,7 +1144,7 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
         * Cache entries may not be populated.  So we cannot rely on the
         * size of the clean queue.
         */
-       if (idle) {
+       if (idle || mq->cleaner) {
                /*
                 * We'd like to clean everything.
                 */
@@ -1722,11 +1728,9 @@ static void calc_hotspot_params(sector_t origin_size,
                *hotspot_block_size /= 2u;
 }
 
-static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
-                                           sector_t origin_size,
-                                           sector_t cache_block_size,
-                                           bool mimic_mq,
-                                           bool migrations_allowed)
+static struct dm_cache_policy *
+__smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size,
+            bool mimic_mq, bool migrations_allowed, bool cleaner)
 {
        unsigned int i;
        unsigned int nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
@@ -1813,6 +1817,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
                goto bad_btracker;
 
        mq->migrations_allowed = migrations_allowed;
+       mq->cleaner = cleaner;
 
        return &mq->policy;
 
@@ -1836,21 +1841,24 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
                                          sector_t origin_size,
                                          sector_t cache_block_size)
 {
-       return __smq_create(cache_size, origin_size, cache_block_size, false, true);
+       return __smq_create(cache_size, origin_size, cache_block_size,
+                           false, true, false);
 }
 
 static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
                                         sector_t origin_size,
                                         sector_t cache_block_size)
 {
-       return __smq_create(cache_size, origin_size, cache_block_size, true, true);
+       return __smq_create(cache_size, origin_size, cache_block_size,
+                           true, true, false);
 }
 
 static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size,
                                              sector_t origin_size,
                                              sector_t cache_block_size)
 {
-       return __smq_create(cache_size, origin_size, cache_block_size, false, false);
+       return __smq_create(cache_size, origin_size, cache_block_size,
+                           false, false, true);
 }
 
 /*----------------------------------------------------------------*/
index 3d5c56e..97a8d5f 100644 (file)
@@ -2676,6 +2676,7 @@ oom:
        recalc_tags = kvmalloc(recalc_tags_size, GFP_NOIO);
        if (!recalc_tags) {
                vfree(recalc_buffer);
+               recalc_buffer = NULL;
                goto oom;
        }
 
index 8846bf5..becdb68 100644 (file)
@@ -3251,8 +3251,7 @@ size_check:
        r = md_start(&rs->md);
        if (r) {
                ti->error = "Failed to start raid array";
-               mddev_unlock(&rs->md);
-               goto bad_md_start;
+               goto bad_unlock;
        }
 
        /* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
@@ -3260,8 +3259,7 @@ size_check:
                r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
                if (r) {
                        ti->error = "Failed to set raid4/5/6 journal mode";
-                       mddev_unlock(&rs->md);
-                       goto bad_journal_mode_set;
+                       goto bad_unlock;
                }
        }
 
@@ -3272,14 +3270,14 @@ size_check:
        if (rs_is_raid456(rs)) {
                r = rs_set_raid456_stripe_cache(rs);
                if (r)
-                       goto bad_stripe_cache;
+                       goto bad_unlock;
        }
 
        /* Now do an early reshape check */
        if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
                r = rs_check_reshape(rs);
                if (r)
-                       goto bad_check_reshape;
+                       goto bad_unlock;
 
                /* Restore new, ctr requested layout to perform check */
                rs_config_restore(rs, &rs_layout);
@@ -3288,7 +3286,7 @@ size_check:
                        r = rs->md.pers->check_reshape(&rs->md);
                        if (r) {
                                ti->error = "Reshape check failed";
-                               goto bad_check_reshape;
+                               goto bad_unlock;
                        }
                }
        }
@@ -3299,11 +3297,9 @@ size_check:
        mddev_unlock(&rs->md);
        return 0;
 
-bad_md_start:
-bad_journal_mode_set:
-bad_stripe_cache:
-bad_check_reshape:
+bad_unlock:
        md_stop(&rs->md);
+       mddev_unlock(&rs->md);
 bad:
        raid_set_free(rs);
 
@@ -3314,7 +3310,9 @@ static void raid_dtr(struct dm_target *ti)
 {
        struct raid_set *rs = ti->private;
 
+       mddev_lock_nointr(&rs->md);
        md_stop(&rs->md);
+       mddev_unlock(&rs->md);
        raid_set_free(rs);
 }
 
index 2e38ef4..78be781 100644 (file)
@@ -6247,6 +6247,8 @@ static void __md_stop(struct mddev *mddev)
 
 void md_stop(struct mddev *mddev)
 {
+       lockdep_assert_held(&mddev->reconfig_mutex);
+
        /* stop the array and free an attached data structures.
         * This is called from dm-raid
         */
index 04b13cd..ba67587 100644 (file)
@@ -809,8 +809,11 @@ static void pulse8_ping_eeprom_work_handler(struct work_struct *work)
 
        mutex_lock(&pulse8->lock);
        cmd = MSGCODE_PING;
-       pulse8_send_and_wait(pulse8, &cmd, 1,
-                            MSGCODE_COMMAND_ACCEPTED, 0);
+       if (pulse8_send_and_wait(pulse8, &cmd, 1,
+                                MSGCODE_COMMAND_ACCEPTED, 0)) {
+               dev_warn(pulse8->dev, "failed to ping EEPROM\n");
+               goto unlock;
+       }
 
        if (pulse8->vers < 2)
                goto unlock;
index e9b2d90..3f7e147 100644 (file)
@@ -813,8 +813,8 @@ static unsigned long tc358746_find_pll_settings(struct tc358746 *tc358746,
        u32 min_delta = 0xffffffff;
        u16 prediv_max = 17;
        u16 prediv_min = 1;
-       u16 m_best, mul;
-       u16 p_best, p;
+       u16 m_best = 0, mul;
+       u16 p_best = 1, p;
        u8 postdiv;
 
        if (fout > 1000 * HZ_PER_MHZ) {
index 8fd5b6e..7551ca4 100644 (file)
@@ -2459,16 +2459,10 @@ static int dvb_register(struct cx23885_tsport *port)
                        request_module("%s", info.type);
                        client_tuner = i2c_new_client_device(&dev->i2c_bus[1].i2c_adap, &info);
                        if (!i2c_client_has_driver(client_tuner)) {
-                               module_put(client_demod->dev.driver->owner);
-                               i2c_unregister_device(client_demod);
-                               port->i2c_client_demod = NULL;
                                goto frontend_detach;
                        }
                        if (!try_module_get(client_tuner->dev.driver->owner)) {
                                i2c_unregister_device(client_tuner);
-                               module_put(client_demod->dev.driver->owner);
-                               i2c_unregister_device(client_demod);
-                               port->i2c_client_demod = NULL;
                                goto frontend_detach;
                        }
                        port->i2c_client_tuner = client_tuner;
@@ -2505,16 +2499,10 @@ static int dvb_register(struct cx23885_tsport *port)
                        request_module("%s", info.type);
                        client_tuner = i2c_new_client_device(&dev->i2c_bus[1].i2c_adap, &info);
                        if (!i2c_client_has_driver(client_tuner)) {
-                               module_put(client_demod->dev.driver->owner);
-                               i2c_unregister_device(client_demod);
-                               port->i2c_client_demod = NULL;
                                goto frontend_detach;
                        }
                        if (!try_module_get(client_tuner->dev.driver->owner)) {
                                i2c_unregister_device(client_tuner);
-                               module_put(client_demod->dev.driver->owner);
-                               i2c_unregister_device(client_demod);
-                               port->i2c_client_demod = NULL;
                                goto frontend_detach;
                        }
                        port->i2c_client_tuner = client_tuner;
index 43d85a5..7863b7b 100644 (file)
@@ -826,7 +826,7 @@ static const struct dev_pm_ops vpu_core_pm_ops = {
 
 static struct vpu_core_resources imx8q_enc = {
        .type = VPU_CORE_TYPE_ENC,
-       .fwname = "vpu/vpu_fw_imx8_enc.bin",
+       .fwname = "amphion/vpu/vpu_fw_imx8_enc.bin",
        .stride = 16,
        .max_width = 1920,
        .max_height = 1920,
@@ -841,7 +841,7 @@ static struct vpu_core_resources imx8q_enc = {
 
 static struct vpu_core_resources imx8q_dec = {
        .type = VPU_CORE_TYPE_DEC,
-       .fwname = "vpu/vpu_fw_imx8_dec.bin",
+       .fwname = "amphion/vpu/vpu_fw_imx8_dec.bin",
        .stride = 256,
        .max_width = 8188,
        .max_height = 8188,
index bf759eb..b6d5b48 100644 (file)
@@ -46,11 +46,10 @@ static int vpu_mbox_request_channel(struct device *dev, struct vpu_mbox *mbox)
        cl->rx_callback = vpu_mbox_rx_callback;
 
        ch = mbox_request_channel_byname(cl, mbox->name);
-       if (IS_ERR(ch)) {
-               dev_err(dev, "Failed to request mbox chan %s, ret : %ld\n",
-                       mbox->name, PTR_ERR(ch));
-               return PTR_ERR(ch);
-       }
+       if (IS_ERR(ch))
+               return dev_err_probe(dev, PTR_ERR(ch),
+                                    "Failed to request mbox chan %s\n",
+                                    mbox->name);
 
        mbox->ch = ch;
        return 0;
index 4768156..60425c9 100644 (file)
@@ -28,7 +28,6 @@
 #include "mtk_jpeg_core.h"
 #include "mtk_jpeg_dec_parse.h"
 
-#if defined(CONFIG_OF)
 static struct mtk_jpeg_fmt mtk_jpeg_enc_formats[] = {
        {
                .fourcc         = V4L2_PIX_FMT_JPEG,
@@ -102,7 +101,6 @@ static struct mtk_jpeg_fmt mtk_jpeg_dec_formats[] = {
                .flags          = MTK_JPEG_FMT_FLAG_CAPTURE,
        },
 };
-#endif
 
 #define MTK_JPEG_ENC_NUM_FORMATS ARRAY_SIZE(mtk_jpeg_enc_formats)
 #define MTK_JPEG_DEC_NUM_FORMATS ARRAY_SIZE(mtk_jpeg_dec_formats)
@@ -1312,6 +1310,8 @@ static int mtk_jpeg_probe(struct platform_device *pdev)
        jpeg->dev = &pdev->dev;
        jpeg->variant = of_device_get_match_data(jpeg->dev);
 
+       platform_set_drvdata(pdev, jpeg);
+
        ret = devm_of_platform_populate(&pdev->dev);
        if (ret) {
                v4l2_err(&jpeg->v4l2_dev, "Master of platform populate failed.");
@@ -1383,8 +1383,6 @@ static int mtk_jpeg_probe(struct platform_device *pdev)
                  jpeg->variant->dev_name, jpeg->vdev->num,
                  VIDEO_MAJOR, jpeg->vdev->minor);
 
-       platform_set_drvdata(pdev, jpeg);
-
        pm_runtime_enable(&pdev->dev);
 
        return 0;
@@ -1455,7 +1453,6 @@ static const struct dev_pm_ops mtk_jpeg_pm_ops = {
        SET_RUNTIME_PM_OPS(mtk_jpeg_pm_suspend, mtk_jpeg_pm_resume, NULL)
 };
 
-#if defined(CONFIG_OF)
 static int mtk_jpegenc_get_hw(struct mtk_jpeg_ctx *ctx)
 {
        struct mtk_jpegenc_comp_dev *comp_jpeg;
@@ -1951,14 +1948,13 @@ static const struct of_device_id mtk_jpeg_match[] = {
 };
 
 MODULE_DEVICE_TABLE(of, mtk_jpeg_match);
-#endif
 
 static struct platform_driver mtk_jpeg_driver = {
        .probe = mtk_jpeg_probe,
        .remove_new = mtk_jpeg_remove,
        .driver = {
                .name           = MTK_JPEG_NAME,
-               .of_match_table = of_match_ptr(mtk_jpeg_match),
+               .of_match_table = mtk_jpeg_match,
                .pm             = &mtk_jpeg_pm_ops,
        },
 };
index 869068f..baa7be5 100644 (file)
@@ -39,7 +39,6 @@ enum mtk_jpeg_color {
        MTK_JPEG_COLOR_400              = 0x00110000
 };
 
-#if defined(CONFIG_OF)
 static const struct of_device_id mtk_jpegdec_hw_ids[] = {
        {
                .compatible = "mediatek,mt8195-jpgdec-hw",
@@ -47,7 +46,6 @@ static const struct of_device_id mtk_jpegdec_hw_ids[] = {
        {},
 };
 MODULE_DEVICE_TABLE(of, mtk_jpegdec_hw_ids);
-#endif
 
 static inline int mtk_jpeg_verify_align(u32 val, int align, u32 reg)
 {
@@ -653,7 +651,7 @@ static struct platform_driver mtk_jpegdec_hw_driver = {
        .probe = mtk_jpegdec_hw_probe,
        .driver = {
                .name = "mtk-jpegdec-hw",
-               .of_match_table = of_match_ptr(mtk_jpegdec_hw_ids),
+               .of_match_table = mtk_jpegdec_hw_ids,
        },
 };
 
index 71e85b4..2440183 100644 (file)
@@ -46,7 +46,6 @@ static const struct mtk_jpeg_enc_qlt mtk_jpeg_enc_quality[] = {
        {.quality_param = 97, .hardware_value = JPEG_ENC_QUALITY_Q97},
 };
 
-#if defined(CONFIG_OF)
 static const struct of_device_id mtk_jpegenc_drv_ids[] = {
        {
                .compatible = "mediatek,mt8195-jpgenc-hw",
@@ -54,7 +53,6 @@ static const struct of_device_id mtk_jpegenc_drv_ids[] = {
        {},
 };
 MODULE_DEVICE_TABLE(of, mtk_jpegenc_drv_ids);
-#endif
 
 void mtk_jpeg_enc_reset(void __iomem *base)
 {
@@ -377,7 +375,7 @@ static struct platform_driver mtk_jpegenc_hw_driver = {
        .probe = mtk_jpegenc_hw_probe,
        .driver = {
                .name = "mtk-jpegenc-hw",
-               .of_match_table = of_match_ptr(mtk_jpegenc_drv_ids),
+               .of_match_table = mtk_jpegenc_drv_ids,
        },
 };
 
index 9ff439a..315e97a 100644 (file)
@@ -821,6 +821,8 @@ static int vb2ops_venc_queue_setup(struct vb2_queue *vq,
                return -EINVAL;
 
        if (*nplanes) {
+               if (*nplanes != q_data->fmt->num_planes)
+                       return -EINVAL;
                for (i = 0; i < *nplanes; i++)
                        if (sizes[i] < q_data->sizeimage[i])
                                return -EINVAL;
index f555341..04e6dc6 100644 (file)
@@ -233,7 +233,8 @@ void vdec_msg_queue_deinit(struct vdec_msg_queue *msg_queue,
                kfree(lat_buf->private_data);
        }
 
-       cancel_work_sync(&msg_queue->core_work);
+       if (msg_queue->wdma_addr.size)
+               cancel_work_sync(&msg_queue->core_work);
 }
 
 static void vdec_msg_queue_core_work(struct work_struct *work)
index ed15ea3..a2b4fb9 100644 (file)
@@ -58,7 +58,6 @@
 #define CAST_OFBSIZE_LO                        CAST_STATUS18
 #define CAST_OFBSIZE_HI                        CAST_STATUS19
 
-#define MXC_MAX_SLOTS  1 /* TODO use all 4 slots*/
 /* JPEG-Decoder Wrapper Slot Registers 0..3 */
 #define SLOT_BASE                      0x10000
 #define SLOT_STATUS                    0x0
index c0e49be..9512c0a 100644 (file)
@@ -745,87 +745,77 @@ static void notify_src_chg(struct mxc_jpeg_ctx *ctx)
        v4l2_event_queue_fh(&ctx->fh, &ev);
 }
 
-static int mxc_get_free_slot(struct mxc_jpeg_slot_data slot_data[], int n)
+static int mxc_get_free_slot(struct mxc_jpeg_slot_data *slot_data)
 {
-       int free_slot = 0;
-
-       while (slot_data[free_slot].used && free_slot < n)
-               free_slot++;
-
-       return free_slot; /* >=n when there are no more free slots */
+       if (!slot_data->used)
+               return slot_data->slot;
+       return -1;
 }
 
-static bool mxc_jpeg_alloc_slot_data(struct mxc_jpeg_dev *jpeg,
-                                    unsigned int slot)
+static bool mxc_jpeg_alloc_slot_data(struct mxc_jpeg_dev *jpeg)
 {
        struct mxc_jpeg_desc *desc;
        struct mxc_jpeg_desc *cfg_desc;
        void *cfg_stm;
 
-       if (jpeg->slot_data[slot].desc)
+       if (jpeg->slot_data.desc)
                goto skip_alloc; /* already allocated, reuse it */
 
        /* allocate descriptor for decoding/encoding phase */
        desc = dma_alloc_coherent(jpeg->dev,
                                  sizeof(struct mxc_jpeg_desc),
-                                 &jpeg->slot_data[slot].desc_handle,
+                                 &jpeg->slot_data.desc_handle,
                                  GFP_ATOMIC);
        if (!desc)
                goto err;
-       jpeg->slot_data[slot].desc = desc;
+       jpeg->slot_data.desc = desc;
 
        /* allocate descriptor for configuration phase (encoder only) */
        cfg_desc = dma_alloc_coherent(jpeg->dev,
                                      sizeof(struct mxc_jpeg_desc),
-                                     &jpeg->slot_data[slot].cfg_desc_handle,
+                                     &jpeg->slot_data.cfg_desc_handle,
                                      GFP_ATOMIC);
        if (!cfg_desc)
                goto err;
-       jpeg->slot_data[slot].cfg_desc = cfg_desc;
+       jpeg->slot_data.cfg_desc = cfg_desc;
 
        /* allocate configuration stream */
        cfg_stm = dma_alloc_coherent(jpeg->dev,
                                     MXC_JPEG_MAX_CFG_STREAM,
-                                    &jpeg->slot_data[slot].cfg_stream_handle,
+                                    &jpeg->slot_data.cfg_stream_handle,
                                     GFP_ATOMIC);
        if (!cfg_stm)
                goto err;
-       jpeg->slot_data[slot].cfg_stream_vaddr = cfg_stm;
+       jpeg->slot_data.cfg_stream_vaddr = cfg_stm;
 
 skip_alloc:
-       jpeg->slot_data[slot].used = true;
+       jpeg->slot_data.used = true;
 
        return true;
 err:
-       dev_err(jpeg->dev, "Could not allocate descriptors for slot %d", slot);
+       dev_err(jpeg->dev, "Could not allocate descriptors for slot %d", jpeg->slot_data.slot);
 
        return false;
 }
 
-static void mxc_jpeg_free_slot_data(struct mxc_jpeg_dev *jpeg,
-                                   unsigned int slot)
+static void mxc_jpeg_free_slot_data(struct mxc_jpeg_dev *jpeg)
 {
-       if (slot >= MXC_MAX_SLOTS) {
-               dev_err(jpeg->dev, "Invalid slot %d, nothing to free.", slot);
-               return;
-       }
-
        /* free descriptor for decoding/encoding phase */
        dma_free_coherent(jpeg->dev, sizeof(struct mxc_jpeg_desc),
-                         jpeg->slot_data[slot].desc,
-                         jpeg->slot_data[slot].desc_handle);
+                         jpeg->slot_data.desc,
+                         jpeg->slot_data.desc_handle);
 
        /* free descriptor for encoder configuration phase / decoder DHT */
        dma_free_coherent(jpeg->dev, sizeof(struct mxc_jpeg_desc),
-                         jpeg->slot_data[slot].cfg_desc,
-                         jpeg->slot_data[slot].cfg_desc_handle);
+                         jpeg->slot_data.cfg_desc,
+                         jpeg->slot_data.cfg_desc_handle);
 
        /* free configuration stream */
        dma_free_coherent(jpeg->dev, MXC_JPEG_MAX_CFG_STREAM,
-                         jpeg->slot_data[slot].cfg_stream_vaddr,
-                         jpeg->slot_data[slot].cfg_stream_handle);
+                         jpeg->slot_data.cfg_stream_vaddr,
+                         jpeg->slot_data.cfg_stream_handle);
 
-       jpeg->slot_data[slot].used = false;
+       jpeg->slot_data.used = false;
 }
 
 static void mxc_jpeg_check_and_set_last_buffer(struct mxc_jpeg_ctx *ctx,
@@ -855,7 +845,7 @@ static void mxc_jpeg_job_finish(struct mxc_jpeg_ctx *ctx, enum vb2_buffer_state
        v4l2_m2m_buf_done(dst_buf, state);
 
        mxc_jpeg_disable_irq(reg, ctx->slot);
-       ctx->mxc_jpeg->slot_data[ctx->slot].used = false;
+       jpeg->slot_data.used = false;
        if (reset)
                mxc_jpeg_sw_reset(reg);
 }
@@ -919,7 +909,7 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv)
                goto job_unlock;
        }
 
-       if (!jpeg->slot_data[slot].used)
+       if (!jpeg->slot_data.used)
                goto job_unlock;
 
        dec_ret = readl(reg + MXC_SLOT_OFFSET(slot, SLOT_STATUS));
@@ -1179,13 +1169,13 @@ static void mxc_jpeg_config_dec_desc(struct vb2_buffer *out_buf,
        struct mxc_jpeg_dev *jpeg = ctx->mxc_jpeg;
        void __iomem *reg = jpeg->base_reg;
        unsigned int slot = ctx->slot;
-       struct mxc_jpeg_desc *desc = jpeg->slot_data[slot].desc;
-       struct mxc_jpeg_desc *cfg_desc = jpeg->slot_data[slot].cfg_desc;
-       dma_addr_t desc_handle = jpeg->slot_data[slot].desc_handle;
-       dma_addr_t cfg_desc_handle = jpeg->slot_data[slot].cfg_desc_handle;
-       dma_addr_t cfg_stream_handle = jpeg->slot_data[slot].cfg_stream_handle;
-       unsigned int *cfg_size = &jpeg->slot_data[slot].cfg_stream_size;
-       void *cfg_stream_vaddr = jpeg->slot_data[slot].cfg_stream_vaddr;
+       struct mxc_jpeg_desc *desc = jpeg->slot_data.desc;
+       struct mxc_jpeg_desc *cfg_desc = jpeg->slot_data.cfg_desc;
+       dma_addr_t desc_handle = jpeg->slot_data.desc_handle;
+       dma_addr_t cfg_desc_handle = jpeg->slot_data.cfg_desc_handle;
+       dma_addr_t cfg_stream_handle = jpeg->slot_data.cfg_stream_handle;
+       unsigned int *cfg_size = &jpeg->slot_data.cfg_stream_size;
+       void *cfg_stream_vaddr = jpeg->slot_data.cfg_stream_vaddr;
        struct mxc_jpeg_src_buf *jpeg_src_buf;
 
        jpeg_src_buf = vb2_to_mxc_buf(src_buf);
@@ -1245,18 +1235,18 @@ static void mxc_jpeg_config_enc_desc(struct vb2_buffer *out_buf,
        struct mxc_jpeg_dev *jpeg = ctx->mxc_jpeg;
        void __iomem *reg = jpeg->base_reg;
        unsigned int slot = ctx->slot;
-       struct mxc_jpeg_desc *desc = jpeg->slot_data[slot].desc;
-       struct mxc_jpeg_desc *cfg_desc = jpeg->slot_data[slot].cfg_desc;
-       dma_addr_t desc_handle = jpeg->slot_data[slot].desc_handle;
-       dma_addr_t cfg_desc_handle = jpeg->slot_data[slot].cfg_desc_handle;
-       void *cfg_stream_vaddr = jpeg->slot_data[slot].cfg_stream_vaddr;
+       struct mxc_jpeg_desc *desc = jpeg->slot_data.desc;
+       struct mxc_jpeg_desc *cfg_desc = jpeg->slot_data.cfg_desc;
+       dma_addr_t desc_handle = jpeg->slot_data.desc_handle;
+       dma_addr_t cfg_desc_handle = jpeg->slot_data.cfg_desc_handle;
+       void *cfg_stream_vaddr = jpeg->slot_data.cfg_stream_vaddr;
        struct mxc_jpeg_q_data *q_data;
        enum mxc_jpeg_image_format img_fmt;
        int w, h;
 
        q_data = mxc_jpeg_get_q_data(ctx, src_buf->vb2_queue->type);
 
-       jpeg->slot_data[slot].cfg_stream_size =
+       jpeg->slot_data.cfg_stream_size =
                        mxc_jpeg_setup_cfg_stream(cfg_stream_vaddr,
                                                  q_data->fmt->fourcc,
                                                  q_data->crop.width,
@@ -1265,7 +1255,7 @@ static void mxc_jpeg_config_enc_desc(struct vb2_buffer *out_buf,
        /* chain the config descriptor with the encoding descriptor */
        cfg_desc->next_descpt_ptr = desc_handle | MXC_NXT_DESCPT_EN;
 
-       cfg_desc->buf_base0 = jpeg->slot_data[slot].cfg_stream_handle;
+       cfg_desc->buf_base0 = jpeg->slot_data.cfg_stream_handle;
        cfg_desc->buf_base1 = 0;
        cfg_desc->line_pitch = 0;
        cfg_desc->stm_bufbase = 0; /* no output expected */
@@ -1408,7 +1398,7 @@ static void mxc_jpeg_device_run_timeout(struct work_struct *work)
        unsigned long flags;
 
        spin_lock_irqsave(&ctx->mxc_jpeg->hw_lock, flags);
-       if (ctx->slot < MXC_MAX_SLOTS && ctx->mxc_jpeg->slot_data[ctx->slot].used) {
+       if (ctx->mxc_jpeg->slot_data.used) {
                dev_warn(jpeg->dev, "%s timeout, cancel it\n",
                         ctx->mxc_jpeg->mode == MXC_JPEG_DECODE ? "decode" : "encode");
                mxc_jpeg_job_finish(ctx, VB2_BUF_STATE_ERROR, true);
@@ -1476,12 +1466,12 @@ static void mxc_jpeg_device_run(void *priv)
        mxc_jpeg_enable(reg);
        mxc_jpeg_set_l_endian(reg, 1);
 
-       ctx->slot = mxc_get_free_slot(jpeg->slot_data, MXC_MAX_SLOTS);
-       if (ctx->slot >= MXC_MAX_SLOTS) {
+       ctx->slot = mxc_get_free_slot(&jpeg->slot_data);
+       if (ctx->slot < 0) {
                dev_err(dev, "No more free slots\n");
                goto end;
        }
-       if (!mxc_jpeg_alloc_slot_data(jpeg, ctx->slot)) {
+       if (!mxc_jpeg_alloc_slot_data(jpeg)) {
                dev_err(dev, "Cannot allocate slot data\n");
                goto end;
        }
@@ -2101,7 +2091,7 @@ static int mxc_jpeg_open(struct file *file)
        }
        ctx->fh.ctrl_handler = &ctx->ctrl_handler;
        mxc_jpeg_set_default_params(ctx);
-       ctx->slot = MXC_MAX_SLOTS; /* slot not allocated yet */
+       ctx->slot = -1; /* slot not allocated yet */
        INIT_DELAYED_WORK(&ctx->task_timer, mxc_jpeg_device_run_timeout);
 
        if (mxc_jpeg->mode == MXC_JPEG_DECODE)
@@ -2677,6 +2667,11 @@ static int mxc_jpeg_attach_pm_domains(struct mxc_jpeg_dev *jpeg)
                dev_err(dev, "No power domains defined for jpeg node\n");
                return jpeg->num_domains;
        }
+       if (jpeg->num_domains == 1) {
+               /* genpd_dev_pm_attach() attach automatically if power domains count is 1 */
+               jpeg->num_domains = 0;
+               return 0;
+       }
 
        jpeg->pd_dev = devm_kmalloc_array(dev, jpeg->num_domains,
                                          sizeof(*jpeg->pd_dev), GFP_KERNEL);
@@ -2718,7 +2713,6 @@ static int mxc_jpeg_probe(struct platform_device *pdev)
        int ret;
        int mode;
        const struct of_device_id *of_id;
-       unsigned int slot;
 
        of_id = of_match_node(mxc_jpeg_match, dev->of_node);
        if (!of_id)
@@ -2742,19 +2736,22 @@ static int mxc_jpeg_probe(struct platform_device *pdev)
        if (IS_ERR(jpeg->base_reg))
                return PTR_ERR(jpeg->base_reg);
 
-       for (slot = 0; slot < MXC_MAX_SLOTS; slot++) {
-               dec_irq = platform_get_irq(pdev, slot);
-               if (dec_irq < 0) {
-                       ret = dec_irq;
-                       goto err_irq;
-               }
-               ret = devm_request_irq(&pdev->dev, dec_irq, mxc_jpeg_dec_irq,
-                                      0, pdev->name, jpeg);
-               if (ret) {
-                       dev_err(&pdev->dev, "Failed to request irq %d (%d)\n",
-                               dec_irq, ret);
-                       goto err_irq;
-               }
+       ret = of_property_read_u32_index(pdev->dev.of_node, "slot", 0, &jpeg->slot_data.slot);
+       if (ret)
+               jpeg->slot_data.slot = 0;
+       dev_info(&pdev->dev, "choose slot %d\n", jpeg->slot_data.slot);
+       dec_irq = platform_get_irq(pdev, 0);
+       if (dec_irq < 0) {
+               dev_err(&pdev->dev, "Failed to get irq %d\n", dec_irq);
+               ret = dec_irq;
+               goto err_irq;
+       }
+       ret = devm_request_irq(&pdev->dev, dec_irq, mxc_jpeg_dec_irq,
+                              0, pdev->name, jpeg);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to request irq %d (%d)\n",
+                       dec_irq, ret);
+               goto err_irq;
        }
 
        jpeg->pdev = pdev;
@@ -2914,11 +2911,9 @@ static const struct dev_pm_ops   mxc_jpeg_pm_ops = {
 
 static void mxc_jpeg_remove(struct platform_device *pdev)
 {
-       unsigned int slot;
        struct mxc_jpeg_dev *jpeg = platform_get_drvdata(pdev);
 
-       for (slot = 0; slot < MXC_MAX_SLOTS; slot++)
-               mxc_jpeg_free_slot_data(jpeg, slot);
+       mxc_jpeg_free_slot_data(jpeg);
 
        pm_runtime_disable(&pdev->dev);
        video_unregister_device(jpeg->dec_vdev);
index 87157db..d80e94c 100644 (file)
@@ -97,7 +97,7 @@ struct mxc_jpeg_ctx {
        struct mxc_jpeg_q_data          cap_q;
        struct v4l2_fh                  fh;
        enum mxc_jpeg_enc_state         enc_state;
-       unsigned int                    slot;
+       int                             slot;
        unsigned int                    source_change;
        bool                            header_parsed;
        struct v4l2_ctrl_handler        ctrl_handler;
@@ -106,6 +106,7 @@ struct mxc_jpeg_ctx {
 };
 
 struct mxc_jpeg_slot_data {
+       int slot;
        bool used;
        struct mxc_jpeg_desc *desc; // enc/dec descriptor
        struct mxc_jpeg_desc *cfg_desc; // configuration descriptor
@@ -128,7 +129,7 @@ struct mxc_jpeg_dev {
        struct v4l2_device              v4l2_dev;
        struct v4l2_m2m_dev             *m2m_dev;
        struct video_device             *dec_vdev;
-       struct mxc_jpeg_slot_data       slot_data[MXC_MAX_SLOTS];
+       struct mxc_jpeg_slot_data       slot_data;
        int                             num_domains;
        struct device                   **pd_dev;
        struct device_link              **pd_link;
index 0bd2613..791bde6 100644 (file)
@@ -9,7 +9,9 @@
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/math.h>
 #include <linux/mfd/syscon.h>
+#include <linux/minmax.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_graph.h>
@@ -1137,8 +1139,9 @@ __imx7_csi_video_try_fmt(struct v4l2_pix_format *pixfmt,
         * TODO: Implement configurable stride support.
         */
        walign = 8 * 8 / cc->bpp;
-       v4l_bound_align_image(&pixfmt->width, 1, 0xffff, walign,
-                             &pixfmt->height, 1, 0xffff, 1, 0);
+       pixfmt->width = clamp(round_up(pixfmt->width, walign), walign,
+                             round_down(65535U, walign));
+       pixfmt->height = clamp(pixfmt->height, 1U, 65535U);
 
        pixfmt->bytesperline = pixfmt->width * cc->bpp / 8;
        pixfmt->sizeimage = pixfmt->bytesperline * pixfmt->height;
index 7f0802a..3418d2d 100644 (file)
@@ -251,8 +251,8 @@ int pkt_session_unset_buffers(struct hfi_session_release_buffer_pkt *pkt,
 
                pkt->extradata_size = 0;
                pkt->shdr.hdr.size =
-                       struct_size((struct hfi_session_set_buffers_pkt *)0,
-                                   buffer_info, bd->num_buffers);
+                       struct_size_t(struct hfi_session_set_buffers_pkt,
+                                     buffer_info, bd->num_buffers);
        }
 
        pkt->response_req = bd->response_required;
index 6523ffb..77aee94 100644 (file)
@@ -370,26 +370,26 @@ extern int hantro_debug;
        pr_err("%s:%d: " fmt, __func__, __LINE__, ##args)
 
 /* Structure access helpers. */
-static inline struct hantro_ctx *fh_to_ctx(struct v4l2_fh *fh)
+static __always_inline struct hantro_ctx *fh_to_ctx(struct v4l2_fh *fh)
 {
        return container_of(fh, struct hantro_ctx, fh);
 }
 
 /* Register accessors. */
-static inline void vepu_write_relaxed(struct hantro_dev *vpu,
-                                     u32 val, u32 reg)
+static __always_inline void vepu_write_relaxed(struct hantro_dev *vpu,
+                                              u32 val, u32 reg)
 {
        vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
        writel_relaxed(val, vpu->enc_base + reg);
 }
 
-static inline void vepu_write(struct hantro_dev *vpu, u32 val, u32 reg)
+static __always_inline void vepu_write(struct hantro_dev *vpu, u32 val, u32 reg)
 {
        vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
        writel(val, vpu->enc_base + reg);
 }
 
-static inline u32 vepu_read(struct hantro_dev *vpu, u32 reg)
+static __always_inline u32 vepu_read(struct hantro_dev *vpu, u32 reg)
 {
        u32 val = readl(vpu->enc_base + reg);
 
@@ -397,27 +397,27 @@ static inline u32 vepu_read(struct hantro_dev *vpu, u32 reg)
        return val;
 }
 
-static inline void vdpu_write_relaxed(struct hantro_dev *vpu,
-                                     u32 val, u32 reg)
+static __always_inline void vdpu_write_relaxed(struct hantro_dev *vpu,
+                                              u32 val, u32 reg)
 {
        vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
        writel_relaxed(val, vpu->dec_base + reg);
 }
 
-static inline void vdpu_write(struct hantro_dev *vpu, u32 val, u32 reg)
+static __always_inline void vdpu_write(struct hantro_dev *vpu, u32 val, u32 reg)
 {
        vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
        writel(val, vpu->dec_base + reg);
 }
 
-static inline void hantro_write_addr(struct hantro_dev *vpu,
-                                    unsigned long offset,
-                                    dma_addr_t addr)
+static __always_inline void hantro_write_addr(struct hantro_dev *vpu,
+                                             unsigned long offset,
+                                             dma_addr_t addr)
 {
        vdpu_write(vpu, addr & 0xffffffff, offset);
 }
 
-static inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg)
+static __always_inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg)
 {
        u32 val = readl(vpu->dec_base + reg);
 
@@ -425,9 +425,9 @@ static inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg)
        return val;
 }
 
-static inline u32 vdpu_read_mask(struct hantro_dev *vpu,
-                                const struct hantro_reg *reg,
-                                u32 val)
+static __always_inline u32 vdpu_read_mask(struct hantro_dev *vpu,
+                                         const struct hantro_reg *reg,
+                                         u32 val)
 {
        u32 v;
 
@@ -437,18 +437,18 @@ static inline u32 vdpu_read_mask(struct hantro_dev *vpu,
        return v;
 }
 
-static inline void hantro_reg_write(struct hantro_dev *vpu,
-                                   const struct hantro_reg *reg,
-                                   u32 val)
+static __always_inline void hantro_reg_write(struct hantro_dev *vpu,
+                                            const struct hantro_reg *reg,
+                                            u32 val)
 {
-       vdpu_write_relaxed(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
+       vdpu_write(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
 }
 
-static inline void hantro_reg_write_s(struct hantro_dev *vpu,
-                                     const struct hantro_reg *reg,
-                                     u32 val)
+static __always_inline void hantro_reg_write_relaxed(struct hantro_dev *vpu,
+                                                    const struct hantro_reg *reg,
+                                                    u32 val)
 {
-       vdpu_write(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
+       vdpu_write_relaxed(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
 }
 
 void *hantro_get_ctrl(struct hantro_ctx *ctx, u32 id);
index c977d64..0224ff6 100644 (file)
                         val); \
 }
 
-#define HANTRO_PP_REG_WRITE_S(vpu, reg_name, val) \
+#define HANTRO_PP_REG_WRITE_RELAXED(vpu, reg_name, val) \
 { \
-       hantro_reg_write_s(vpu, \
-                          &hantro_g1_postproc_regs.reg_name, \
-                          val); \
+       hantro_reg_write_relaxed(vpu, \
+                                &hantro_g1_postproc_regs.reg_name, \
+                                val); \
 }
 
 #define VPU_PP_IN_YUYV                 0x0
@@ -72,7 +72,7 @@ static void hantro_postproc_g1_enable(struct hantro_ctx *ctx)
        dma_addr_t dst_dma;
 
        /* Turn on pipeline mode. Must be done first. */
-       HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x1);
+       HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x1);
 
        src_pp_fmt = VPU_PP_IN_NV12;
 
@@ -242,7 +242,7 @@ static void hantro_postproc_g1_disable(struct hantro_ctx *ctx)
 {
        struct hantro_dev *vpu = ctx->dev;
 
-       HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
+       HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x0);
 }
 
 static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
index 5ac2a42..f4988f0 100644 (file)
@@ -45,7 +45,7 @@ static int uvc_control_add_xu_mapping(struct uvc_video_chain *chain,
        map->menu_names = NULL;
        map->menu_mapping = NULL;
 
-       map->menu_mask = BIT_MASK(xmap->menu_count);
+       map->menu_mask = GENMASK(xmap->menu_count - 1, 0);
 
        size = xmap->menu_count * sizeof(*map->menu_mapping);
        map->menu_mapping = kzalloc(size, GFP_KERNEL);
index 4a750da..deb6e65 100644 (file)
@@ -755,6 +755,43 @@ const char *const tegra_mc_error_names[8] = {
        [6] = "SMMU translation error",
 };
 
+struct icc_node *tegra_mc_icc_xlate(struct of_phandle_args *spec, void *data)
+{
+       struct tegra_mc *mc = icc_provider_to_tegra_mc(data);
+       struct icc_node *node;
+
+       list_for_each_entry(node, &mc->provider.nodes, node_list) {
+               if (node->id == spec->args[0])
+                       return node;
+       }
+
+       /*
+        * If a client driver calls devm_of_icc_get() before the MC driver
+        * is probed, then return EPROBE_DEFER to the client driver.
+        */
+       return ERR_PTR(-EPROBE_DEFER);
+}
+
+static int tegra_mc_icc_get(struct icc_node *node, u32 *average, u32 *peak)
+{
+       *average = 0;
+       *peak = 0;
+
+       return 0;
+}
+
+static int tegra_mc_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+       return 0;
+}
+
+const struct tegra_mc_icc_ops tegra_mc_icc_ops = {
+       .xlate = tegra_mc_icc_xlate,
+       .aggregate = icc_std_aggregate,
+       .get_bw = tegra_mc_icc_get,
+       .set = tegra_mc_icc_set,
+};
+
 /*
  * Memory Controller (MC) has few Memory Clients that are issuing memory
  * bandwidth allocation requests to the MC interconnect provider. The MC
index b2416ee..26035ac 100644 (file)
@@ -1355,6 +1355,7 @@ const struct tegra_mc_soc tegra194_mc_soc = {
                   MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
        .has_addr_hi_reg = true,
        .ops = &tegra186_mc_ops,
+       .icc_ops = &tegra_mc_icc_ops,
        .ch_intmask = 0x00000f00,
        .global_intstatus_channel_shift = 8,
 };
index 8e873a7..8fb83b3 100644 (file)
@@ -827,7 +827,7 @@ static int tegra234_mc_icc_set(struct icc_node *src, struct icc_node *dst)
                return 0;
 
        if (!mc->bwmgr_mrq_supported)
-               return -EINVAL;
+               return 0;
 
        if (!mc->bpmp) {
                dev_err(mc->dev, "BPMP reference NULL\n");
@@ -874,7 +874,7 @@ static int tegra234_mc_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
        struct tegra_mc *mc = icc_provider_to_tegra_mc(p);
 
        if (!mc->bwmgr_mrq_supported)
-               return -EINVAL;
+               return 0;
 
        if (node->id == TEGRA_ICC_MC_CPU_CLUSTER0 ||
            node->id == TEGRA_ICC_MC_CPU_CLUSTER1 ||
@@ -889,27 +889,6 @@ static int tegra234_mc_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
        return 0;
 }
 
-static struct icc_node*
-tegra234_mc_of_icc_xlate(struct of_phandle_args *spec, void *data)
-{
-       struct tegra_mc *mc = icc_provider_to_tegra_mc(data);
-       unsigned int cl_id = spec->args[0];
-       struct icc_node *node;
-
-       list_for_each_entry(node, &mc->provider.nodes, node_list) {
-               if (node->id != cl_id)
-                       continue;
-
-               return node;
-       }
-
-       /*
-        * If a client driver calls devm_of_icc_get() before the MC driver
-        * is probed, then return EPROBE_DEFER to the client driver.
-        */
-       return ERR_PTR(-EPROBE_DEFER);
-}
-
 static int tegra234_mc_icc_get_init_bw(struct icc_node *node, u32 *avg, u32 *peak)
 {
        *avg = 0;
@@ -919,7 +898,7 @@ static int tegra234_mc_icc_get_init_bw(struct icc_node *node, u32 *avg, u32 *pea
 }
 
 static const struct tegra_mc_icc_ops tegra234_mc_icc_ops = {
-       .xlate = tegra234_mc_of_icc_xlate,
+       .xlate = tegra_mc_icc_xlate,
        .aggregate = tegra234_mc_icc_aggregate,
        .get_bw = tegra234_mc_icc_get_init_bw,
        .set = tegra234_mc_icc_set,
index d676cf6..3dae5e3 100644 (file)
@@ -195,7 +195,7 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
                }
        }
 
-       if (option->force_clkreq_0)
+       if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
                rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
        else
index cfebad5..f4ab094 100644 (file)
@@ -435,17 +435,10 @@ static void rts5228_init_from_cfg(struct rtsx_pcr *pcr)
                        option->ltr_enabled = false;
                }
        }
-
-       if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
-                               | PM_L1_1_EN | PM_L1_2_EN))
-               option->force_clkreq_0 = false;
-       else
-               option->force_clkreq_0 = true;
 }
 
 static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
 {
-       struct rtsx_cr_option *option = &pcr->option;
 
        rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
                        CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
@@ -476,17 +469,6 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
        else
                rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
 
-       /*
-        * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
-        * to drive low, and we forcibly request clock.
-        */
-       if (option->force_clkreq_0)
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
-       else
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
        rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
 
        if (pcr->rtd3_en) {
index 91d240d..47ab72a 100644 (file)
@@ -327,12 +327,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
                }
        }
 
-
        /*
         * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
         * to drive low, and we forcibly request clock.
         */
-       if (option->force_clkreq_0)
+       if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
                rtsx_pci_write_register(pcr, PETXCFG,
                        FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
        else
index 9b42b20..79b18f6 100644 (file)
@@ -517,17 +517,10 @@ static void rts5260_init_from_cfg(struct rtsx_pcr *pcr)
                        option->ltr_enabled = false;
                }
        }
-
-       if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
-                               | PM_L1_1_EN | PM_L1_2_EN))
-               option->force_clkreq_0 = false;
-       else
-               option->force_clkreq_0 = true;
 }
 
 static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
 {
-       struct rtsx_cr_option *option = &pcr->option;
 
        /* Set mcu_cnt to 7 to ensure data can be sampled properly */
        rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07);
@@ -546,17 +539,6 @@ static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
 
        rts5260_init_hw(pcr);
 
-       /*
-        * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
-        * to drive low, and we forcibly request clock.
-        */
-       if (option->force_clkreq_0)
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
-       else
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
        rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
 
        return 0;
index b1e7603..94af6bf 100644 (file)
@@ -498,17 +498,10 @@ static void rts5261_init_from_cfg(struct rtsx_pcr *pcr)
                        option->ltr_enabled = false;
                }
        }
-
-       if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
-                               | PM_L1_1_EN | PM_L1_2_EN))
-               option->force_clkreq_0 = false;
-       else
-               option->force_clkreq_0 = true;
 }
 
 static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
 {
-       struct rtsx_cr_option *option = &pcr->option;
        u32 val;
 
        rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
@@ -554,17 +547,6 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
        else
                rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
 
-       /*
-        * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
-        * to drive low, and we forcibly request clock.
-        */
-       if (option->force_clkreq_0)
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
-       else
-               rtsx_pci_write_register(pcr, PETXCFG,
-                                FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
        rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
 
        if (pcr->rtd3_en) {
index 32b7783..a3f4b52 100644 (file)
@@ -1326,8 +1326,11 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
                        return err;
        }
 
-       if (pcr->aspm_mode == ASPM_MODE_REG)
+       if (pcr->aspm_mode == ASPM_MODE_REG) {
                rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30);
+               rtsx_pci_write_register(pcr, PETXCFG,
+                               FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+       }
 
        /* No CD interrupt if probing driver with card inserted.
         * So we need to initialize pcr->card_exist here.
index 35fec1b..5867af9 100644 (file)
@@ -139,7 +139,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
        if (ret) {
                ret->i_ino = get_next_ino();
                ret->i_mode = mode;
-               ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+               ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
        }
        return ret;
 }
index cbaf6d3..2101eb1 100644 (file)
@@ -1124,7 +1124,7 @@ static ssize_t ibmvmc_write(struct file *file, const char *buffer,
                goto out;
 
        inode = file_inode(file);
-       inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 
        dev_dbg(adapter->dev, "write: file = 0x%lx, count = 0x%lx\n",
index 3c95600..c66cc05 100644 (file)
@@ -273,8 +273,8 @@ static void lkdtm_HUNG_TASK(void)
        schedule();
 }
 
-volatile unsigned int huge = INT_MAX - 2;
-volatile unsigned int ignored;
+static volatile unsigned int huge = INT_MAX - 2;
+static volatile unsigned int ignored;
 
 static void lkdtm_OVERFLOW_SIGNED(void)
 {
@@ -305,7 +305,7 @@ static void lkdtm_OVERFLOW_UNSIGNED(void)
        ignored = value;
 }
 
-/* Intentionally using old-style flex array definition of 1 byte. */
+/* Intentionally using unannotated flex array definition. */
 struct array_bounds_flex_array {
        int one;
        int two;
@@ -357,6 +357,46 @@ static void lkdtm_ARRAY_BOUNDS(void)
                pr_expected_config(CONFIG_UBSAN_BOUNDS);
 }
 
+struct lkdtm_annotated {
+       unsigned long flags;
+       int count;
+       int array[] __counted_by(count);
+};
+
+static volatile int fam_count = 4;
+
+static void lkdtm_FAM_BOUNDS(void)
+{
+       struct lkdtm_annotated *inst;
+
+       inst = kzalloc(struct_size(inst, array, fam_count + 1), GFP_KERNEL);
+       if (!inst) {
+               pr_err("FAIL: could not allocate test struct!\n");
+               return;
+       }
+
+       inst->count = fam_count;
+       pr_info("Array access within bounds ...\n");
+       inst->array[1] = fam_count;
+       ignored = inst->array[1];
+
+       pr_info("Array access beyond bounds ...\n");
+       inst->array[fam_count] = fam_count;
+       ignored = inst->array[fam_count];
+
+       kfree(inst);
+
+       pr_err("FAIL: survived access of invalid flexible array member index!\n");
+
+       if (!__has_attribute(__counted_by__))
+               pr_warn("This is expected since this %s was built a compiler supporting __counted_by\n",
+                       lkdtm_kernel_info);
+       else if (IS_ENABLED(CONFIG_UBSAN_BOUNDS))
+               pr_expected_config(CONFIG_UBSAN_TRAP);
+       else
+               pr_expected_config(CONFIG_UBSAN_BOUNDS);
+}
+
 static void lkdtm_CORRUPT_LIST_ADD(void)
 {
        /*
@@ -393,7 +433,7 @@ static void lkdtm_CORRUPT_LIST_ADD(void)
                pr_err("Overwrite did not happen, but no BUG?!\n");
        else {
                pr_err("list_add() corruption not detected!\n");
-               pr_expected_config(CONFIG_DEBUG_LIST);
+               pr_expected_config(CONFIG_LIST_HARDENED);
        }
 }
 
@@ -420,7 +460,7 @@ static void lkdtm_CORRUPT_LIST_DEL(void)
                pr_err("Overwrite did not happen, but no BUG?!\n");
        else {
                pr_err("list_del() corruption not detected!\n");
-               pr_expected_config(CONFIG_DEBUG_LIST);
+               pr_expected_config(CONFIG_LIST_HARDENED);
        }
 }
 
@@ -616,6 +656,7 @@ static struct crashtype crashtypes[] = {
        CRASHTYPE(OVERFLOW_SIGNED),
        CRASHTYPE(OVERFLOW_UNSIGNED),
        CRASHTYPE(ARRAY_BOUNDS),
+       CRASHTYPE(FAM_BOUNDS),
        CRASHTYPE(CORRUPT_LIST_ADD),
        CRASHTYPE(CORRUPT_LIST_DEL),
        CRASHTYPE(STACK_GUARD_PAGE_LEADING),
index 5757adf..6120973 100644 (file)
@@ -236,7 +236,7 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
                        }
                        if (!label)
                                block->label = devm_kasprintf(sram->dev, GFP_KERNEL,
-                                                             "%s", dev_name(sram->dev));
+                                                             "%s", of_node_full_name(child));
                        else
                                block->label = devm_kstrdup(sram->dev,
                                                            label, GFP_KERNEL);
index b488f70..05e2c15 100644 (file)
@@ -13,6 +13,8 @@
 
 #include <linux/mfd/tps6594.h>
 
+#define TPS6594_DEV_REV_1 0x08
+
 static irqreturn_t tps6594_esm_isr(int irq, void *dev_id)
 {
        struct platform_device *pdev = dev_id;
@@ -32,11 +34,26 @@ static int tps6594_esm_probe(struct platform_device *pdev)
 {
        struct tps6594 *tps = dev_get_drvdata(pdev->dev.parent);
        struct device *dev = &pdev->dev;
+       unsigned int rev;
        int irq;
        int ret;
        int i;
 
-       for (i = 0 ; i < pdev->num_resources ; i++) {
+       /*
+        * Due to a bug in revision 1 of the PMIC, the GPIO3 used for the
+        * SoC ESM function is used to power the load switch instead.
+        * As a consequence, ESM can not be used on those PMIC.
+        * Check the version and return an error in case of revision 1.
+        */
+       ret = regmap_read(tps->regmap, TPS6594_REG_DEV_REV, &rev);
+       if (ret)
+               return dev_err_probe(dev, ret,
+                                    "Failed to read PMIC revision\n");
+       if (rev == TPS6594_DEV_REV_1)
+               return dev_err_probe(dev, -ENODEV,
+                             "ESM not supported for revision 1 PMIC\n");
+
+       for (i = 0; i < pdev->num_resources; i++) {
                irq = platform_get_irq_byname(pdev, pdev->resource[i].name);
                if (irq < 0)
                        return dev_err_probe(dev, irq, "Failed to get %s irq\n",
index f701efb..b6f4be2 100644 (file)
@@ -2097,14 +2097,14 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq,
        mmc_blk_urgent_bkops(mq, mqrq);
 }
 
-static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
+static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, enum mmc_issue_type issue_type)
 {
        unsigned long flags;
        bool put_card;
 
        spin_lock_irqsave(&mq->lock, flags);
 
-       mq->in_flight[mmc_issue_type(mq, req)] -= 1;
+       mq->in_flight[issue_type] -= 1;
 
        put_card = (mmc_tot_in_flight(mq) == 0);
 
@@ -2117,6 +2117,7 @@ static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
 static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req,
                                bool can_sleep)
 {
+       enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
        struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
        struct mmc_request *mrq = &mqrq->brq.mrq;
        struct mmc_host *host = mq->card->host;
@@ -2136,7 +2137,7 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req,
                        blk_mq_complete_request(req);
        }
 
-       mmc_blk_mq_dec_in_flight(mq, req);
+       mmc_blk_mq_dec_in_flight(mq, issue_type);
 }
 
 void mmc_blk_mq_recovery(struct mmc_queue *mq)
index 2d002c8..d0d6ffc 100644 (file)
@@ -338,13 +338,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
                                return;
                        }
                        for (len = 0; len < remain && len < host->fifo_width;) {
-                               /* SCR data must be read in big endian. */
-                               if (data->mrq->cmd->opcode == SD_APP_SEND_SCR)
-                                       *sgp = ioread32be(host->base +
-                                                         REG_DATA_WINDOW);
-                               else
-                                       *sgp = ioread32(host->base +
-                                                       REG_DATA_WINDOW);
+                               *sgp = ioread32(host->base + REG_DATA_WINDOW);
                                sgp++;
                                len += 4;
                        }
index a202a69..3215063 100644 (file)
@@ -29,9 +29,16 @@ struct f_sdhost_priv {
        bool enable_cmd_dat_delay;
 };
 
+static void *sdhci_f_sdhost_priv(struct sdhci_host *host)
+{
+       struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+       return sdhci_pltfm_priv(pltfm_host);
+}
+
 static void sdhci_f_sdh30_soft_voltage_switch(struct sdhci_host *host)
 {
-       struct f_sdhost_priv *priv = sdhci_priv(host);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
        u32 ctrl = 0;
 
        usleep_range(2500, 3000);
@@ -64,7 +71,7 @@ static unsigned int sdhci_f_sdh30_get_min_clock(struct sdhci_host *host)
 
 static void sdhci_f_sdh30_reset(struct sdhci_host *host, u8 mask)
 {
-       struct f_sdhost_priv *priv = sdhci_priv(host);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
        u32 ctl;
 
        if (sdhci_readw(host, SDHCI_CLOCK_CONTROL) == 0)
@@ -95,30 +102,32 @@ static const struct sdhci_ops sdhci_f_sdh30_ops = {
        .set_uhs_signaling = sdhci_set_uhs_signaling,
 };
 
+static const struct sdhci_pltfm_data sdhci_f_sdh30_pltfm_data = {
+       .ops = &sdhci_f_sdh30_ops,
+       .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC
+               | SDHCI_QUIRK_INVERTED_WRITE_PROTECT,
+       .quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE
+               |  SDHCI_QUIRK2_TUNING_WORK_AROUND,
+};
+
 static int sdhci_f_sdh30_probe(struct platform_device *pdev)
 {
        struct sdhci_host *host;
        struct device *dev = &pdev->dev;
-       int irq, ctrl = 0, ret = 0;
+       int ctrl = 0, ret = 0;
        struct f_sdhost_priv *priv;
+       struct sdhci_pltfm_host *pltfm_host;
        u32 reg = 0;
 
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0)
-               return irq;
-
-       host = sdhci_alloc_host(dev, sizeof(struct f_sdhost_priv));
+       host = sdhci_pltfm_init(pdev, &sdhci_f_sdh30_pltfm_data,
+                               sizeof(struct f_sdhost_priv));
        if (IS_ERR(host))
                return PTR_ERR(host);
 
-       priv = sdhci_priv(host);
+       pltfm_host = sdhci_priv(host);
+       priv = sdhci_pltfm_priv(pltfm_host);
        priv->dev = dev;
 
-       host->quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
-                      SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
-       host->quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE |
-                       SDHCI_QUIRK2_TUNING_WORK_AROUND;
-
        priv->enable_cmd_dat_delay = device_property_read_bool(dev,
                                                "fujitsu,cmd-dat-delay-select");
 
@@ -126,18 +135,6 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev)
        if (ret)
                goto err;
 
-       platform_set_drvdata(pdev, host);
-
-       host->hw_name = "f_sdh30";
-       host->ops = &sdhci_f_sdh30_ops;
-       host->irq = irq;
-
-       host->ioaddr = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(host->ioaddr)) {
-               ret = PTR_ERR(host->ioaddr);
-               goto err;
-       }
-
        if (dev_of_node(dev)) {
                sdhci_get_of_property(pdev);
 
@@ -204,24 +201,24 @@ err_rst:
 err_clk:
        clk_disable_unprepare(priv->clk_iface);
 err:
-       sdhci_free_host(host);
+       sdhci_pltfm_free(pdev);
+
        return ret;
 }
 
 static int sdhci_f_sdh30_remove(struct platform_device *pdev)
 {
        struct sdhci_host *host = platform_get_drvdata(pdev);
-       struct f_sdhost_priv *priv = sdhci_priv(host);
+       struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
+       struct clk *clk_iface = priv->clk_iface;
+       struct reset_control *rst = priv->rst;
+       struct clk *clk = priv->clk;
 
-       sdhci_remove_host(host, readl(host->ioaddr + SDHCI_INT_STATUS) ==
-                         0xffffffff);
-
-       reset_control_assert(priv->rst);
-       clk_disable_unprepare(priv->clk);
-       clk_disable_unprepare(priv->clk_iface);
+       sdhci_pltfm_unregister(pdev);
 
-       sdhci_free_host(host);
-       platform_set_drvdata(pdev, NULL);
+       reset_control_assert(rst);
+       clk_disable_unprepare(clk);
+       clk_disable_unprepare(clk_iface);
 
        return 0;
 }
index db5e0dc..2bdebeb 100644 (file)
@@ -863,11 +863,9 @@ static int spmmc_drv_probe(struct platform_device *pdev)
        struct spmmc_host *host;
        int ret = 0;
 
-       mmc = mmc_alloc_host(sizeof(*host), &pdev->dev);
-       if (!mmc) {
-               ret = -ENOMEM;
-               goto probe_free_host;
-       }
+       mmc = devm_mmc_alloc_host(&pdev->dev, sizeof(struct spmmc_host));
+       if (!mmc)
+               return -ENOMEM;
 
        host = mmc_priv(mmc);
        host->mmc = mmc;
@@ -902,7 +900,7 @@ static int spmmc_drv_probe(struct platform_device *pdev)
 
        ret = mmc_of_parse(mmc);
        if (ret)
-               goto probe_free_host;
+               goto clk_disable;
 
        mmc->ops = &spmmc_ops;
        mmc->f_min = SPMMC_MIN_CLK;
@@ -911,7 +909,7 @@ static int spmmc_drv_probe(struct platform_device *pdev)
 
        ret = mmc_regulator_get_supply(mmc);
        if (ret)
-               goto probe_free_host;
+               goto clk_disable;
 
        if (!mmc->ocr_avail)
                mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
@@ -927,14 +925,17 @@ static int spmmc_drv_probe(struct platform_device *pdev)
        host->tuning_info.enable_tuning = 1;
        pm_runtime_set_active(&pdev->dev);
        pm_runtime_enable(&pdev->dev);
-       mmc_add_host(mmc);
+       ret = mmc_add_host(mmc);
+       if (ret)
+               goto pm_disable;
 
-       return ret;
+       return 0;
 
-probe_free_host:
-       if (mmc)
-               mmc_free_host(mmc);
+pm_disable:
+       pm_runtime_disable(&pdev->dev);
 
+clk_disable:
+       clk_disable_unprepare(host->clk);
        return ret;
 }
 
@@ -948,7 +949,6 @@ static int spmmc_drv_remove(struct platform_device *dev)
        pm_runtime_put_noidle(&dev->dev);
        pm_runtime_disable(&dev->dev);
        platform_set_drvdata(dev, NULL);
-       mmc_free_host(host->mmc);
 
        return 0;
 }
index 521af92..bf2a92f 100644 (file)
@@ -1705,8 +1705,6 @@ static int wbsd_init(struct device *dev, int base, int irq, int dma,
 
                wbsd_release_resources(host);
                wbsd_free_mmc(dev);
-
-               mmc_free_host(mmc);
                return ret;
        }
 
index 0864261..7366e85 100644 (file)
@@ -135,7 +135,7 @@ static int fun_exec_op(struct nand_chip *chip, const struct nand_operation *op,
        unsigned int i;
        int ret;
 
-       if (op->cs > NAND_MAX_CHIPS)
+       if (op->cs >= NAND_MAX_CHIPS)
                return -EINVAL;
 
        if (check_only)
index d3faf80..b10011d 100644 (file)
@@ -1278,7 +1278,6 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
        struct meson_nfc *nfc = nand_get_controller_data(nand);
        struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
        struct mtd_info *mtd = nand_to_mtd(nand);
-       int nsectors = mtd->writesize / 1024;
        int raw_writesize;
        int ret;
 
@@ -1304,7 +1303,7 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
        nand->options |= NAND_NO_SUBPAGE_WRITE;
 
        ret = nand_ecc_choose_conf(nand, nfc->data->ecc_caps,
-                                  mtd->oobsize - 2 * nsectors);
+                                  mtd->oobsize - 2);
        if (ret) {
                dev_err(nfc->dev, "failed to ECC init\n");
                return -EINVAL;
index 6e1eac6..4a97d4a 100644 (file)
@@ -177,17 +177,17 @@ static void elm_load_syndrome(struct elm_info *info,
                        switch (info->bch_type) {
                        case BCH8_ECC:
                                /* syndrome fragment 0 = ecc[9-12B] */
-                               val = cpu_to_be32(*(u32 *) &ecc[9]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[9]);
                                elm_write_reg(info, offset, val);
 
                                /* syndrome fragment 1 = ecc[5-8B] */
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[5]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[5]);
                                elm_write_reg(info, offset, val);
 
                                /* syndrome fragment 2 = ecc[1-4B] */
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[1]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[1]);
                                elm_write_reg(info, offset, val);
 
                                /* syndrome fragment 3 = ecc[0B] */
@@ -197,35 +197,35 @@ static void elm_load_syndrome(struct elm_info *info,
                                break;
                        case BCH4_ECC:
                                /* syndrome fragment 0 = ecc[20-52b] bits */
-                               val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) |
+                               val = ((__force u32)cpu_to_be32(*(u32 *)&ecc[3]) >> 4) |
                                        ((ecc[2] & 0xf) << 28);
                                elm_write_reg(info, offset, val);
 
                                /* syndrome fragment 1 = ecc[0-20b] bits */
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 12;
                                elm_write_reg(info, offset, val);
                                break;
                        case BCH16_ECC:
-                               val = cpu_to_be32(*(u32 *) &ecc[22]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[22]);
                                elm_write_reg(info, offset, val);
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[18]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[18]);
                                elm_write_reg(info, offset, val);
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[14]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[14]);
                                elm_write_reg(info, offset, val);
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[10]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[10]);
                                elm_write_reg(info, offset, val);
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[6]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[6]);
                                elm_write_reg(info, offset, val);
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[2]);
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[2]);
                                elm_write_reg(info, offset, val);
                                offset += 4;
-                               val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16;
+                               val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 16;
                                elm_write_reg(info, offset, val);
                                break;
                        default:
index 2312e27..5a04680 100644 (file)
@@ -562,9 +562,10 @@ static int rk_nfc_write_page_raw(struct nand_chip *chip, const u8 *buf,
                 *    BBM  OOB1 OOB2 OOB3 |......|  PA0  PA1  PA2  PA3
                 *
                 * The rk_nfc_ooblayout_free() function already has reserved
-                * these 4 bytes with:
+                * these 4 bytes together with 2 bytes for BBM
+                * by reducing it's length:
                 *
-                * oob_region->offset = NFC_SYS_DATA_SIZE + 2;
+                * oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2;
                 */
                if (!i)
                        memcpy(rk_nfc_oob_ptr(chip, i),
@@ -597,7 +598,7 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
        int pages_per_blk = mtd->erasesize / mtd->writesize;
        int ret = 0, i, boot_rom_mode = 0;
        dma_addr_t dma_data, dma_oob;
-       u32 reg;
+       u32 tmp;
        u8 *oob;
 
        nand_prog_page_begin_op(chip, page, 0, NULL, 0);
@@ -624,6 +625,13 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
         *
         *   0xFF 0xFF 0xFF 0xFF | BBM OOB1 OOB2 OOB3 | ...
         *
+        * The code here just swaps the first 4 bytes with the last
+        * 4 bytes without losing any data.
+        *
+        * The chip->oob_poi data layout:
+        *
+        *    BBM  OOB1 OOB2 OOB3 |......|  PA0  PA1  PA2  PA3
+        *
         * Configure the ECC algorithm supported by the boot ROM.
         */
        if ((page < (pages_per_blk * rknand->boot_blks)) &&
@@ -634,21 +642,17 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
        }
 
        for (i = 0; i < ecc->steps; i++) {
-               if (!i) {
-                       reg = 0xFFFFFFFF;
-               } else {
+               if (!i)
+                       oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE;
+               else
                        oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
-                       reg = oob[0] | oob[1] << 8 | oob[2] << 16 |
-                             oob[3] << 24;
-               }
 
-               if (!i && boot_rom_mode)
-                       reg = (page & (pages_per_blk - 1)) * 4;
+               tmp = oob[0] | oob[1] << 8 | oob[2] << 16 | oob[3] << 24;
 
                if (nfc->cfg->type == NFC_V9)
-                       nfc->oob_buf[i] = reg;
+                       nfc->oob_buf[i] = tmp;
                else
-                       nfc->oob_buf[i * (oob_step / 4)] = reg;
+                       nfc->oob_buf[i * (oob_step / 4)] = tmp;
        }
 
        dma_data = dma_map_single(nfc->dev, (void *)nfc->page_buf,
@@ -811,12 +815,17 @@ static int rk_nfc_read_page_hwecc(struct nand_chip *chip, u8 *buf, int oob_on,
                goto timeout_err;
        }
 
-       for (i = 1; i < ecc->steps; i++) {
-               oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
+       for (i = 0; i < ecc->steps; i++) {
+               if (!i)
+                       oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE;
+               else
+                       oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
+
                if (nfc->cfg->type == NFC_V9)
                        tmp = nfc->oob_buf[i];
                else
                        tmp = nfc->oob_buf[i * (oob_step / 4)];
+
                *oob++ = (u8)tmp;
                *oob++ = (u8)(tmp >> 8);
                *oob++ = (u8)(tmp >> 16);
@@ -933,12 +942,8 @@ static int rk_nfc_ooblayout_free(struct mtd_info *mtd, int section,
        if (section)
                return -ERANGE;
 
-       /*
-        * The beginning of the OOB area stores the reserved data for the NFC,
-        * the size of the reserved data is NFC_SYS_DATA_SIZE bytes.
-        */
        oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2;
-       oob_region->offset = NFC_SYS_DATA_SIZE + 2;
+       oob_region->offset = 2;
 
        return 0;
 }
index 7380b1e..a80427c 100644 (file)
@@ -73,7 +73,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
 {
        struct nand_device *nand = spinand_to_nand(spinand);
        u8 mbf = 0;
-       struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf);
+       struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf);
 
        switch (status & STATUS_ECC_MASK) {
        case STATUS_ECC_NO_BITFLIPS:
@@ -92,7 +92,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
                if (spi_mem_exec_op(spinand->spimem, &op))
                        return nanddev_get_ecc_conf(nand)->strength;
 
-               mbf >>= 4;
+               mbf = *(spinand->scratchbuf) >> 4;
 
                if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
                        return nanddev_get_ecc_conf(nand)->strength;
index 3ad58cd..f507e37 100644 (file)
@@ -108,7 +108,7 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand,
 {
        struct nand_device *nand = spinand_to_nand(spinand);
        u8 mbf = 0;
-       struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf);
+       struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf);
 
        switch (status & STATUS_ECC_MASK) {
        case STATUS_ECC_NO_BITFLIPS:
@@ -126,7 +126,7 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand,
                if (spi_mem_exec_op(spinand->spimem, &op))
                        return nanddev_get_ecc_conf(nand)->strength;
 
-               mbf >>= 4;
+               mbf = *(spinand->scratchbuf) >> 4;
 
                if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
                        return nanddev_get_ecc_conf(nand)->strength;
index 36876aa..15f9a80 100644 (file)
@@ -361,7 +361,7 @@ static int cypress_nor_determine_addr_mode_by_sr1(struct spi_nor *nor,
  */
 static int cypress_nor_set_addr_mode_nbytes(struct spi_nor *nor)
 {
-       struct spi_mem_op op = {};
+       struct spi_mem_op op;
        u8 addr_mode;
        int ret;
 
@@ -492,7 +492,7 @@ s25fs256t_post_bfpt_fixup(struct spi_nor *nor,
                          const struct sfdp_parameter_header *bfpt_header,
                          const struct sfdp_bfpt *bfpt)
 {
-       struct spi_mem_op op = {};
+       struct spi_mem_op op;
        int ret;
 
        ret = cypress_nor_set_addr_mode_nbytes(nor);
index b9dbad3..fc5da5d 100644 (file)
@@ -660,10 +660,10 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
                return NULL;
        arp = (struct arp_pkt *)skb_network_header(skb);
 
-       /* Don't modify or load balance ARPs that do not originate locally
-        * (e.g.,arrive via a bridge).
+       /* Don't modify or load balance ARPs that do not originate
+        * from the bond itself or a VLAN directly above the bond.
         */
-       if (!bond_slave_has_mac_rx(bond, arp->mac_src))
+       if (!bond_slave_has_mac_rcu(bond, arp->mac_src))
                return NULL;
 
        dev = ip_dev_find(dev_net(bond->dev), arp->ip_src);
index 7a0f253..447b06e 100644 (file)
@@ -1508,6 +1508,11 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
 
        memcpy(bond_dev->broadcast, slave_dev->broadcast,
                slave_dev->addr_len);
+
+       if (slave_dev->flags & IFF_POINTOPOINT) {
+               bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
+               bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
+       }
 }
 
 /* On bonding slaves other than the currently active slave, suppress
@@ -5896,7 +5901,9 @@ void bond_setup(struct net_device *bond_dev)
 
        bond_dev->hw_features = BOND_VLAN_FEATURES |
                                NETIF_F_HW_VLAN_CTAG_RX |
-                               NETIF_F_HW_VLAN_CTAG_FILTER;
+                               NETIF_F_HW_VLAN_CTAG_FILTER |
+                               NETIF_F_HW_VLAN_STAG_RX |
+                               NETIF_F_HW_VLAN_STAG_FILTER;
 
        bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
        bond_dev->features |= bond_dev->hw_features;
index 68df6d4..eebf967 100644 (file)
@@ -227,6 +227,8 @@ static int
 __mcp251xfd_chip_set_mode(const struct mcp251xfd_priv *priv,
                          const u8 mode_req, bool nowait)
 {
+       const struct can_bittiming *bt = &priv->can.bittiming;
+       unsigned long timeout_us = MCP251XFD_POLL_TIMEOUT_US;
        u32 con = 0, con_reqop, osc = 0;
        u8 mode;
        int err;
@@ -246,12 +248,16 @@ __mcp251xfd_chip_set_mode(const struct mcp251xfd_priv *priv,
        if (mode_req == MCP251XFD_REG_CON_MODE_SLEEP || nowait)
                return 0;
 
+       if (bt->bitrate)
+               timeout_us = max_t(unsigned long, timeout_us,
+                                  MCP251XFD_FRAME_LEN_MAX_BITS * USEC_PER_SEC /
+                                  bt->bitrate);
+
        err = regmap_read_poll_timeout(priv->map_reg, MCP251XFD_REG_CON, con,
                                       !mcp251xfd_reg_invalid(con) &&
                                       FIELD_GET(MCP251XFD_REG_CON_OPMOD_MASK,
                                                 con) == mode_req,
-                                      MCP251XFD_POLL_SLEEP_US,
-                                      MCP251XFD_POLL_TIMEOUT_US);
+                                      MCP251XFD_POLL_SLEEP_US, timeout_us);
        if (err != -ETIMEDOUT && err != -EBADMSG)
                return err;
 
index 7024ff0..24510b3 100644 (file)
@@ -387,6 +387,7 @@ static_assert(MCP251XFD_TIMESTAMP_WORK_DELAY_SEC <
 #define MCP251XFD_OSC_STAB_TIMEOUT_US (10 * MCP251XFD_OSC_STAB_SLEEP_US)
 #define MCP251XFD_POLL_SLEEP_US (10)
 #define MCP251XFD_POLL_TIMEOUT_US (USEC_PER_MSEC)
+#define MCP251XFD_FRAME_LEN_MAX_BITS (736)
 
 /* Misc */
 #define MCP251XFD_NAPI_WEIGHT 32
index d476c28..bd9eb06 100644 (file)
@@ -303,12 +303,6 @@ struct gs_can {
        struct can_bittiming_const bt_const, data_bt_const;
        unsigned int channel;   /* channel number */
 
-       /* time counter for hardware timestamps */
-       struct cyclecounter cc;
-       struct timecounter tc;
-       spinlock_t tc_lock; /* spinlock to guard access tc->cycle_last */
-       struct delayed_work timestamp;
-
        u32 feature;
        unsigned int hf_size_tx;
 
@@ -325,6 +319,13 @@ struct gs_usb {
        struct gs_can *canch[GS_MAX_INTF];
        struct usb_anchor rx_submitted;
        struct usb_device *udev;
+
+       /* time counter for hardware timestamps */
+       struct cyclecounter cc;
+       struct timecounter tc;
+       spinlock_t tc_lock; /* spinlock to guard access tc->cycle_last */
+       struct delayed_work timestamp;
+
        unsigned int hf_size_rx;
        u8 active_channels;
 };
@@ -388,15 +389,15 @@ static int gs_cmd_reset(struct gs_can *dev)
                                    GFP_KERNEL);
 }
 
-static inline int gs_usb_get_timestamp(const struct gs_can *dev,
+static inline int gs_usb_get_timestamp(const struct gs_usb *parent,
                                       u32 *timestamp_p)
 {
        __le32 timestamp;
        int rc;
 
-       rc = usb_control_msg_recv(dev->udev, 0, GS_USB_BREQ_TIMESTAMP,
+       rc = usb_control_msg_recv(parent->udev, 0, GS_USB_BREQ_TIMESTAMP,
                                  USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
-                                 dev->channel, 0,
+                                 0, 0,
                                  &timestamp, sizeof(timestamp),
                                  USB_CTRL_GET_TIMEOUT,
                                  GFP_KERNEL);
@@ -410,20 +411,20 @@ static inline int gs_usb_get_timestamp(const struct gs_can *dev,
 
 static u64 gs_usb_timestamp_read(const struct cyclecounter *cc) __must_hold(&dev->tc_lock)
 {
-       struct gs_can *dev = container_of(cc, struct gs_can, cc);
+       struct gs_usb *parent = container_of(cc, struct gs_usb, cc);
        u32 timestamp = 0;
        int err;
 
-       lockdep_assert_held(&dev->tc_lock);
+       lockdep_assert_held(&parent->tc_lock);
 
        /* drop lock for synchronous USB transfer */
-       spin_unlock_bh(&dev->tc_lock);
-       err = gs_usb_get_timestamp(dev, &timestamp);
-       spin_lock_bh(&dev->tc_lock);
+       spin_unlock_bh(&parent->tc_lock);
+       err = gs_usb_get_timestamp(parent, &timestamp);
+       spin_lock_bh(&parent->tc_lock);
        if (err)
-               netdev_err(dev->netdev,
-                          "Error %d while reading timestamp. HW timestamps may be inaccurate.",
-                          err);
+               dev_err(&parent->udev->dev,
+                       "Error %d while reading timestamp. HW timestamps may be inaccurate.",
+                       err);
 
        return timestamp;
 }
@@ -431,14 +432,14 @@ static u64 gs_usb_timestamp_read(const struct cyclecounter *cc) __must_hold(&dev
 static void gs_usb_timestamp_work(struct work_struct *work)
 {
        struct delayed_work *delayed_work = to_delayed_work(work);
-       struct gs_can *dev;
+       struct gs_usb *parent;
 
-       dev = container_of(delayed_work, struct gs_can, timestamp);
-       spin_lock_bh(&dev->tc_lock);
-       timecounter_read(&dev->tc);
-       spin_unlock_bh(&dev->tc_lock);
+       parent = container_of(delayed_work, struct gs_usb, timestamp);
+       spin_lock_bh(&parent->tc_lock);
+       timecounter_read(&parent->tc);
+       spin_unlock_bh(&parent->tc_lock);
 
-       schedule_delayed_work(&dev->timestamp,
+       schedule_delayed_work(&parent->timestamp,
                              GS_USB_TIMESTAMP_WORK_DELAY_SEC * HZ);
 }
 
@@ -446,37 +447,38 @@ static void gs_usb_skb_set_timestamp(struct gs_can *dev,
                                     struct sk_buff *skb, u32 timestamp)
 {
        struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
+       struct gs_usb *parent = dev->parent;
        u64 ns;
 
-       spin_lock_bh(&dev->tc_lock);
-       ns = timecounter_cyc2time(&dev->tc, timestamp);
-       spin_unlock_bh(&dev->tc_lock);
+       spin_lock_bh(&parent->tc_lock);
+       ns = timecounter_cyc2time(&parent->tc, timestamp);
+       spin_unlock_bh(&parent->tc_lock);
 
        hwtstamps->hwtstamp = ns_to_ktime(ns);
 }
 
-static void gs_usb_timestamp_init(struct gs_can *dev)
+static void gs_usb_timestamp_init(struct gs_usb *parent)
 {
-       struct cyclecounter *cc = &dev->cc;
+       struct cyclecounter *cc = &parent->cc;
 
        cc->read = gs_usb_timestamp_read;
        cc->mask = CYCLECOUNTER_MASK(32);
        cc->shift = 32 - bits_per(NSEC_PER_SEC / GS_USB_TIMESTAMP_TIMER_HZ);
        cc->mult = clocksource_hz2mult(GS_USB_TIMESTAMP_TIMER_HZ, cc->shift);
 
-       spin_lock_init(&dev->tc_lock);
-       spin_lock_bh(&dev->tc_lock);
-       timecounter_init(&dev->tc, &dev->cc, ktime_get_real_ns());
-       spin_unlock_bh(&dev->tc_lock);
+       spin_lock_init(&parent->tc_lock);
+       spin_lock_bh(&parent->tc_lock);
+       timecounter_init(&parent->tc, &parent->cc, ktime_get_real_ns());
+       spin_unlock_bh(&parent->tc_lock);
 
-       INIT_DELAYED_WORK(&dev->timestamp, gs_usb_timestamp_work);
-       schedule_delayed_work(&dev->timestamp,
+       INIT_DELAYED_WORK(&parent->timestamp, gs_usb_timestamp_work);
+       schedule_delayed_work(&parent->timestamp,
                              GS_USB_TIMESTAMP_WORK_DELAY_SEC * HZ);
 }
 
-static void gs_usb_timestamp_stop(struct gs_can *dev)
+static void gs_usb_timestamp_stop(struct gs_usb *parent)
 {
-       cancel_delayed_work_sync(&dev->timestamp);
+       cancel_delayed_work_sync(&parent->timestamp);
 }
 
 static void gs_update_state(struct gs_can *dev, struct can_frame *cf)
@@ -560,6 +562,9 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
        if (!netif_device_present(netdev))
                return;
 
+       if (!netif_running(netdev))
+               goto resubmit_urb;
+
        if (hf->echo_id == -1) { /* normal rx */
                if (hf->flags & GS_CAN_FLAG_FD) {
                        skb = alloc_canfd_skb(dev->netdev, &cfd);
@@ -833,6 +838,7 @@ static int gs_can_open(struct net_device *netdev)
                .mode = cpu_to_le32(GS_CAN_MODE_START),
        };
        struct gs_host_frame *hf;
+       struct urb *urb = NULL;
        u32 ctrlmode;
        u32 flags = 0;
        int rc, i;
@@ -855,14 +861,18 @@ static int gs_can_open(struct net_device *netdev)
        }
 
        if (!parent->active_channels) {
+               if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
+                       gs_usb_timestamp_init(parent);
+
                for (i = 0; i < GS_MAX_RX_URBS; i++) {
-                       struct urb *urb;
                        u8 *buf;
 
                        /* alloc rx urb */
                        urb = usb_alloc_urb(0, GFP_KERNEL);
-                       if (!urb)
-                               return -ENOMEM;
+                       if (!urb) {
+                               rc = -ENOMEM;
+                               goto out_usb_kill_anchored_urbs;
+                       }
 
                        /* alloc rx buffer */
                        buf = kmalloc(dev->parent->hf_size_rx,
@@ -870,8 +880,8 @@ static int gs_can_open(struct net_device *netdev)
                        if (!buf) {
                                netdev_err(netdev,
                                           "No memory left for USB buffer\n");
-                               usb_free_urb(urb);
-                               return -ENOMEM;
+                               rc = -ENOMEM;
+                               goto out_usb_free_urb;
                        }
 
                        /* fill, anchor, and submit rx urb */
@@ -894,9 +904,7 @@ static int gs_can_open(struct net_device *netdev)
                                netdev_err(netdev,
                                           "usb_submit failed (err=%d)\n", rc);
 
-                               usb_unanchor_urb(urb);
-                               usb_free_urb(urb);
-                               break;
+                               goto out_usb_unanchor_urb;
                        }
 
                        /* Drop reference,
@@ -926,13 +934,9 @@ static int gs_can_open(struct net_device *netdev)
                flags |= GS_CAN_MODE_FD;
 
        /* if hardware supports timestamps, enable it */
-       if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) {
+       if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
                flags |= GS_CAN_MODE_HW_TIMESTAMP;
 
-               /* start polling timestamp */
-               gs_usb_timestamp_init(dev);
-       }
-
        /* finally start device */
        dev->can.state = CAN_STATE_ERROR_ACTIVE;
        dm.flags = cpu_to_le32(flags);
@@ -942,10 +946,9 @@ static int gs_can_open(struct net_device *netdev)
                                  GFP_KERNEL);
        if (rc) {
                netdev_err(netdev, "Couldn't start device (err=%d)\n", rc);
-               if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
-                       gs_usb_timestamp_stop(dev);
                dev->can.state = CAN_STATE_STOPPED;
-               return rc;
+
+               goto out_usb_kill_anchored_urbs;
        }
 
        parent->active_channels++;
@@ -953,6 +956,22 @@ static int gs_can_open(struct net_device *netdev)
                netif_start_queue(netdev);
 
        return 0;
+
+out_usb_unanchor_urb:
+       usb_unanchor_urb(urb);
+out_usb_free_urb:
+       usb_free_urb(urb);
+out_usb_kill_anchored_urbs:
+       if (!parent->active_channels) {
+               usb_kill_anchored_urbs(&dev->tx_submitted);
+
+               if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
+                       gs_usb_timestamp_stop(parent);
+       }
+
+       close_candev(netdev);
+
+       return rc;
 }
 
 static int gs_usb_get_state(const struct net_device *netdev,
@@ -998,20 +1017,21 @@ static int gs_can_close(struct net_device *netdev)
 
        netif_stop_queue(netdev);
 
-       /* stop polling timestamp */
-       if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
-               gs_usb_timestamp_stop(dev);
-
        /* Stop polling */
        parent->active_channels--;
        if (!parent->active_channels) {
                usb_kill_anchored_urbs(&parent->rx_submitted);
+
+               if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
+                       gs_usb_timestamp_stop(parent);
        }
 
        /* Stop sending URBs */
        usb_kill_anchored_urbs(&dev->tx_submitted);
        atomic_set(&dev->active_tx_urbs, 0);
 
+       dev->can.state = CAN_STATE_STOPPED;
+
        /* reset the device */
        rc = gs_cmd_reset(dev);
        if (rc < 0)
index 4068d96..98c669a 100644 (file)
@@ -192,12 +192,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
 
                nla_peer = data[VXCAN_INFO_PEER];
                ifmp = nla_data(nla_peer);
-               err = rtnl_nla_parse_ifla(peer_tb,
-                                         nla_data(nla_peer) +
-                                         sizeof(struct ifinfomsg),
-                                         nla_len(nla_peer) -
-                                         sizeof(struct ifinfomsg),
-                                         NULL);
+               err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
                if (err < 0)
                        return err;
 
index cde253d..72374b0 100644 (file)
@@ -1436,7 +1436,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
        if (IS_ERR(priv->clk))
                return PTR_ERR(priv->clk);
 
-       clk_prepare_enable(priv->clk);
+       ret = clk_prepare_enable(priv->clk);
+       if (ret)
+               return ret;
 
        priv->clk_mdiv = devm_clk_get_optional(&pdev->dev, "sw_switch_mdiv");
        if (IS_ERR(priv->clk_mdiv)) {
@@ -1444,7 +1446,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
                goto out_clk;
        }
 
-       clk_prepare_enable(priv->clk_mdiv);
+       ret = clk_prepare_enable(priv->clk_mdiv);
+       if (ret)
+               goto out_clk;
 
        ret = bcm_sf2_sw_rst(priv);
        if (ret) {
index 84d5025..91aba47 100644 (file)
@@ -506,7 +506,13 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
                (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
                        shifts[STATIC_MAC_FWD_PORTS];
        alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
-       data_hi >>= 1;
+
+       /* KSZ8795 family switches have STATIC_MAC_TABLE_USE_FID and
+        * STATIC_MAC_TABLE_FID definitions off by 1 when doing read on the
+        * static MAC table compared to doing write.
+        */
+       if (ksz_is_ksz87xx(dev))
+               data_hi >>= 1;
        alu->is_static = true;
        alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
        alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
index 813b91a..6c0623f 100644 (file)
@@ -331,13 +331,13 @@ static const u32 ksz8795_masks[] = {
        [STATIC_MAC_TABLE_VALID]        = BIT(21),
        [STATIC_MAC_TABLE_USE_FID]      = BIT(23),
        [STATIC_MAC_TABLE_FID]          = GENMASK(30, 24),
-       [STATIC_MAC_TABLE_OVERRIDE]     = BIT(26),
-       [STATIC_MAC_TABLE_FWD_PORTS]    = GENMASK(24, 20),
+       [STATIC_MAC_TABLE_OVERRIDE]     = BIT(22),
+       [STATIC_MAC_TABLE_FWD_PORTS]    = GENMASK(20, 16),
        [DYNAMIC_MAC_TABLE_ENTRIES_H]   = GENMASK(6, 0),
-       [DYNAMIC_MAC_TABLE_MAC_EMPTY]   = BIT(8),
+       [DYNAMIC_MAC_TABLE_MAC_EMPTY]   = BIT(7),
        [DYNAMIC_MAC_TABLE_NOT_READY]   = BIT(7),
        [DYNAMIC_MAC_TABLE_ENTRIES]     = GENMASK(31, 29),
-       [DYNAMIC_MAC_TABLE_FID]         = GENMASK(26, 20),
+       [DYNAMIC_MAC_TABLE_FID]         = GENMASK(22, 16),
        [DYNAMIC_MAC_TABLE_SRC_PORT]    = GENMASK(26, 24),
        [DYNAMIC_MAC_TABLE_TIMESTAMP]   = GENMASK(28, 27),
        [P_MII_TX_FLOW_CTRL]            = BIT(5),
@@ -635,10 +635,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
        regmap_reg_range(0x1030, 0x1030),
        regmap_reg_range(0x1100, 0x1115),
        regmap_reg_range(0x111a, 0x111f),
-       regmap_reg_range(0x1122, 0x1127),
-       regmap_reg_range(0x112a, 0x112b),
-       regmap_reg_range(0x1136, 0x1139),
-       regmap_reg_range(0x113e, 0x113f),
+       regmap_reg_range(0x1120, 0x112b),
+       regmap_reg_range(0x1134, 0x113b),
+       regmap_reg_range(0x113c, 0x113f),
        regmap_reg_range(0x1400, 0x1401),
        regmap_reg_range(0x1403, 0x1403),
        regmap_reg_range(0x1410, 0x1417),
@@ -669,10 +668,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
        regmap_reg_range(0x2030, 0x2030),
        regmap_reg_range(0x2100, 0x2115),
        regmap_reg_range(0x211a, 0x211f),
-       regmap_reg_range(0x2122, 0x2127),
-       regmap_reg_range(0x212a, 0x212b),
-       regmap_reg_range(0x2136, 0x2139),
-       regmap_reg_range(0x213e, 0x213f),
+       regmap_reg_range(0x2120, 0x212b),
+       regmap_reg_range(0x2134, 0x213b),
+       regmap_reg_range(0x213c, 0x213f),
        regmap_reg_range(0x2400, 0x2401),
        regmap_reg_range(0x2403, 0x2403),
        regmap_reg_range(0x2410, 0x2417),
@@ -703,10 +701,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
        regmap_reg_range(0x3030, 0x3030),
        regmap_reg_range(0x3100, 0x3115),
        regmap_reg_range(0x311a, 0x311f),
-       regmap_reg_range(0x3122, 0x3127),
-       regmap_reg_range(0x312a, 0x312b),
-       regmap_reg_range(0x3136, 0x3139),
-       regmap_reg_range(0x313e, 0x313f),
+       regmap_reg_range(0x3120, 0x312b),
+       regmap_reg_range(0x3134, 0x313b),
+       regmap_reg_range(0x313c, 0x313f),
        regmap_reg_range(0x3400, 0x3401),
        regmap_reg_range(0x3403, 0x3403),
        regmap_reg_range(0x3410, 0x3417),
@@ -737,10 +734,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
        regmap_reg_range(0x4030, 0x4030),
        regmap_reg_range(0x4100, 0x4115),
        regmap_reg_range(0x411a, 0x411f),
-       regmap_reg_range(0x4122, 0x4127),
-       regmap_reg_range(0x412a, 0x412b),
-       regmap_reg_range(0x4136, 0x4139),
-       regmap_reg_range(0x413e, 0x413f),
+       regmap_reg_range(0x4120, 0x412b),
+       regmap_reg_range(0x4134, 0x413b),
+       regmap_reg_range(0x413c, 0x413f),
        regmap_reg_range(0x4400, 0x4401),
        regmap_reg_range(0x4403, 0x4403),
        regmap_reg_range(0x4410, 0x4417),
@@ -771,10 +767,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
        regmap_reg_range(0x5030, 0x5030),
        regmap_reg_range(0x5100, 0x5115),
        regmap_reg_range(0x511a, 0x511f),
-       regmap_reg_range(0x5122, 0x5127),
-       regmap_reg_range(0x512a, 0x512b),
-       regmap_reg_range(0x5136, 0x5139),
-       regmap_reg_range(0x513e, 0x513f),
+       regmap_reg_range(0x5120, 0x512b),
+       regmap_reg_range(0x5134, 0x513b),
+       regmap_reg_range(0x513c, 0x513f),
        regmap_reg_range(0x5400, 0x5401),
        regmap_reg_range(0x5403, 0x5403),
        regmap_reg_range(0x5410, 0x5417),
index 28444e5..a4de588 100644 (file)
@@ -601,6 +601,13 @@ static inline void ksz_regmap_unlock(void *__mtx)
        mutex_unlock(mtx);
 }
 
+static inline bool ksz_is_ksz87xx(struct ksz_device *dev)
+{
+       return dev->chip_id == KSZ8795_CHIP_ID ||
+              dev->chip_id == KSZ8794_CHIP_ID ||
+              dev->chip_id == KSZ8765_CHIP_ID;
+}
+
 static inline bool ksz_is_ksz88x3(struct ksz_device *dev)
 {
        return dev->chip_id == KSZ8830_CHIP_ID;
index 38b3c6d..b8bb9f3 100644 (file)
@@ -1006,6 +1006,10 @@ mt753x_trap_frames(struct mt7530_priv *priv)
        mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
                   MT753X_BPDU_CPU_ONLY);
 
+       /* Trap 802.1X PAE frames to the CPU port(s) */
+       mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_PORT_FW_MASK,
+                  MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY));
+
        /* Trap LLDP frames with :0E MAC DA to the CPU port(s) */
        mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK,
                   MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY));
index 08045b0..17e42d3 100644 (file)
@@ -66,6 +66,8 @@ enum mt753x_id {
 /* Registers for BPDU and PAE frame control*/
 #define MT753X_BPC                     0x24
 #define  MT753X_BPDU_PORT_FW_MASK      GENMASK(2, 0)
+#define  MT753X_PAE_PORT_FW_MASK       GENMASK(18, 16)
+#define  MT753X_PAE_PORT_FW(x)         FIELD_PREP(MT753X_PAE_PORT_FW_MASK, x)
 
 /* Register for :03 and :0E MAC DA frame control */
 #define MT753X_RGAC2                   0x2c
index 8b51756..7af2f08 100644 (file)
@@ -109,6 +109,13 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
                        usleep_range(1000, 2000);
        }
 
+       err = mv88e6xxx_read(chip, addr, reg, &data);
+       if (err)
+               return err;
+
+       if ((data & mask) == val)
+               return 0;
+
        dev_err(chip->dev, "Timeout while waiting for switch\n");
        return -ETIMEDOUT;
 }
@@ -3027,6 +3034,14 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip)
 
        /* If there is a GPIO connected to the reset pin, toggle it */
        if (gpiod) {
+               /* If the switch has just been reset and not yet completed
+                * loading EEPROM, the reset may interrupt the I2C transaction
+                * mid-byte, causing the first EEPROM read after the reset
+                * from the wrong location resulting in the switch booting
+                * to wrong mode and inoperable.
+                */
+               mv88e6xxx_g1_wait_eeprom_done(chip);
+
                gpiod_set_value_cansleep(gpiod, 1);
                usleep_range(10000, 20000);
                gpiod_set_value_cansleep(gpiod, 0);
index 70c0e2b..bef879c 100644 (file)
@@ -1286,7 +1286,6 @@ static int felix_parse_ports_node(struct felix *felix,
                if (err < 0) {
                        dev_info(dev, "Unsupported PHY mode %s on port %d\n",
                                 phy_modes(phy_mode), port);
-                       of_node_put(child);
 
                        /* Leave port_phy_modes[port] = 0, which is also
                         * PHY_INTERFACE_MODE_NA. This will perform a
@@ -1626,8 +1625,10 @@ static void felix_teardown(struct dsa_switch *ds)
        struct felix *felix = ocelot_to_felix(ocelot);
        struct dsa_port *dp;
 
+       rtnl_lock();
        if (felix->tag_proto_ops)
                felix->tag_proto_ops->teardown(ds);
+       rtnl_unlock();
 
        dsa_switch_for_each_available_port(dp, ds)
                ocelot_deinit_port(ocelot, dp->index);
@@ -1786,16 +1787,15 @@ static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 {
        struct ocelot *ocelot = ds->priv;
        struct ocelot_port *ocelot_port = ocelot->ports[port];
-       struct felix *felix = ocelot_to_felix(ocelot);
 
        ocelot_port_set_maxlen(ocelot, port, new_mtu);
 
-       mutex_lock(&ocelot->tas_lock);
+       mutex_lock(&ocelot->fwd_domain_lock);
 
-       if (ocelot_port->taprio && felix->info->tas_guard_bands_update)
-               felix->info->tas_guard_bands_update(ocelot, port);
+       if (ocelot_port->taprio && ocelot->ops->tas_guard_bands_update)
+               ocelot->ops->tas_guard_bands_update(ocelot, port);
 
-       mutex_unlock(&ocelot->tas_lock);
+       mutex_unlock(&ocelot->fwd_domain_lock);
 
        return 0;
 }
index 96008c0..1d4befe 100644 (file)
@@ -57,7 +57,6 @@ struct felix_info {
        void    (*mdio_bus_free)(struct ocelot *ocelot);
        int     (*port_setup_tc)(struct dsa_switch *ds, int port,
                                 enum tc_setup_type type, void *type_data);
-       void    (*tas_guard_bands_update)(struct ocelot *ocelot, int port);
        void    (*port_sched_speed_set)(struct ocelot *ocelot, int port,
                                        u32 speed);
        void    (*phylink_mac_config)(struct ocelot *ocelot, int port,
index bb39fed..f16daa9 100644 (file)
@@ -1069,6 +1069,9 @@ static u64 vsc9959_tas_remaining_gate_len_ps(u64 gate_len_ns)
        if (gate_len_ns == U64_MAX)
                return U64_MAX;
 
+       if (gate_len_ns < VSC9959_TAS_MIN_GATE_LEN_NS)
+               return 0;
+
        return (gate_len_ns - VSC9959_TAS_MIN_GATE_LEN_NS) * PSEC_PER_NSEC;
 }
 
@@ -1209,15 +1212,17 @@ static u32 vsc9959_tas_tc_max_sdu(struct tc_taprio_qopt_offload *taprio, int tc)
 static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
 {
        struct ocelot_port *ocelot_port = ocelot->ports[port];
+       struct ocelot_mm_state *mm = &ocelot->mm[port];
        struct tc_taprio_qopt_offload *taprio;
        u64 min_gate_len[OCELOT_NUM_TC];
+       u32 val, maxlen, add_frag_size;
+       u64 needed_min_frag_time_ps;
        int speed, picos_per_byte;
        u64 needed_bit_time_ps;
-       u32 val, maxlen;
        u8 tas_speed;
        int tc;
 
-       lockdep_assert_held(&ocelot->tas_lock);
+       lockdep_assert_held(&ocelot->fwd_domain_lock);
 
        taprio = ocelot_port->taprio;
 
@@ -1253,14 +1258,21 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
         */
        needed_bit_time_ps = (u64)(maxlen + 24) * picos_per_byte;
 
+       /* Preemptible TCs don't need to pass a full MTU, the port will
+        * automatically emit a HOLD request when a preemptible TC gate closes
+        */
+       val = ocelot_read_rix(ocelot, QSYS_PREEMPTION_CFG, port);
+       add_frag_size = QSYS_PREEMPTION_CFG_MM_ADD_FRAG_SIZE_X(val);
+       needed_min_frag_time_ps = picos_per_byte *
+               (u64)(24 + 2 * ethtool_mm_frag_size_add_to_min(add_frag_size));
+
        dev_dbg(ocelot->dev,
-               "port %d: max frame size %d needs %llu ps at speed %d\n",
-               port, maxlen, needed_bit_time_ps, speed);
+               "port %d: max frame size %d needs %llu ps, %llu ps for mPackets at speed %d\n",
+               port, maxlen, needed_bit_time_ps, needed_min_frag_time_ps,
+               speed);
 
        vsc9959_tas_min_gate_lengths(taprio, min_gate_len);
 
-       mutex_lock(&ocelot->fwd_domain_lock);
-
        for (tc = 0; tc < OCELOT_NUM_TC; tc++) {
                u32 requested_max_sdu = vsc9959_tas_tc_max_sdu(taprio, tc);
                u64 remaining_gate_len_ps;
@@ -1269,7 +1281,9 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
                remaining_gate_len_ps =
                        vsc9959_tas_remaining_gate_len_ps(min_gate_len[tc]);
 
-               if (remaining_gate_len_ps > needed_bit_time_ps) {
+               if ((mm->active_preemptible_tcs & BIT(tc)) ?
+                   remaining_gate_len_ps > needed_min_frag_time_ps :
+                   remaining_gate_len_ps > needed_bit_time_ps) {
                        /* Setting QMAXSDU_CFG to 0 disables oversized frame
                         * dropping.
                         */
@@ -1323,8 +1337,6 @@ static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port)
        ocelot_write_rix(ocelot, maxlen, QSYS_PORT_MAX_SDU, port);
 
        ocelot->ops->cut_through_fwd(ocelot);
-
-       mutex_unlock(&ocelot->fwd_domain_lock);
 }
 
 static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
@@ -1351,7 +1363,7 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
                break;
        }
 
-       mutex_lock(&ocelot->tas_lock);
+       mutex_lock(&ocelot->fwd_domain_lock);
 
        ocelot_rmw_rix(ocelot,
                       QSYS_TAG_CONFIG_LINK_SPEED(tas_speed),
@@ -1361,7 +1373,7 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
        if (ocelot_port->taprio)
                vsc9959_tas_guard_bands_update(ocelot, port);
 
-       mutex_unlock(&ocelot->tas_lock);
+       mutex_unlock(&ocelot->fwd_domain_lock);
 }
 
 static void vsc9959_new_base_time(struct ocelot *ocelot, ktime_t base_time,
@@ -1409,7 +1421,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
        int ret, i;
        u32 val;
 
-       mutex_lock(&ocelot->tas_lock);
+       mutex_lock(&ocelot->fwd_domain_lock);
 
        if (taprio->cmd == TAPRIO_CMD_DESTROY) {
                ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
@@ -1421,7 +1433,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
 
                vsc9959_tas_guard_bands_update(ocelot, port);
 
-               mutex_unlock(&ocelot->tas_lock);
+               mutex_unlock(&ocelot->fwd_domain_lock);
                return 0;
        } else if (taprio->cmd != TAPRIO_CMD_REPLACE) {
                ret = -EOPNOTSUPP;
@@ -1504,7 +1516,7 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
        ocelot_port->taprio = taprio_offload_get(taprio);
        vsc9959_tas_guard_bands_update(ocelot, port);
 
-       mutex_unlock(&ocelot->tas_lock);
+       mutex_unlock(&ocelot->fwd_domain_lock);
 
        return 0;
 
@@ -1512,7 +1524,7 @@ err_reset_tc:
        taprio->mqprio.qopt.num_tc = 0;
        ocelot_port_mqprio(ocelot, port, &taprio->mqprio);
 err_unlock:
-       mutex_unlock(&ocelot->tas_lock);
+       mutex_unlock(&ocelot->fwd_domain_lock);
 
        return ret;
 }
@@ -1525,7 +1537,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot)
        int port;
        u32 val;
 
-       mutex_lock(&ocelot->tas_lock);
+       mutex_lock(&ocelot->fwd_domain_lock);
 
        for (port = 0; port < ocelot->num_phys_ports; port++) {
                ocelot_port = ocelot->ports[port];
@@ -1563,7 +1575,7 @@ static void vsc9959_tas_clock_adjust(struct ocelot *ocelot)
                               QSYS_TAG_CONFIG_ENABLE,
                               QSYS_TAG_CONFIG, port);
        }
-       mutex_unlock(&ocelot->tas_lock);
+       mutex_unlock(&ocelot->fwd_domain_lock);
 }
 
 static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port,
@@ -1634,6 +1646,18 @@ static int vsc9959_qos_query_caps(struct tc_query_caps_base *base)
        }
 }
 
+static int vsc9959_qos_port_mqprio(struct ocelot *ocelot, int port,
+                                  struct tc_mqprio_qopt_offload *mqprio)
+{
+       int ret;
+
+       mutex_lock(&ocelot->fwd_domain_lock);
+       ret = ocelot_port_mqprio(ocelot, port, mqprio);
+       mutex_unlock(&ocelot->fwd_domain_lock);
+
+       return ret;
+}
+
 static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
                                 enum tc_setup_type type,
                                 void *type_data)
@@ -1646,7 +1670,7 @@ static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
        case TC_SETUP_QDISC_TAPRIO:
                return vsc9959_qos_port_tas_set(ocelot, port, type_data);
        case TC_SETUP_QDISC_MQPRIO:
-               return ocelot_port_mqprio(ocelot, port, type_data);
+               return vsc9959_qos_port_mqprio(ocelot, port, type_data);
        case TC_SETUP_QDISC_CBS:
                return vsc9959_qos_port_cbs_set(ds, port, type_data);
        default:
@@ -2591,6 +2615,7 @@ static const struct ocelot_ops vsc9959_ops = {
        .cut_through_fwd        = vsc9959_cut_through_fwd,
        .tas_clock_adjust       = vsc9959_tas_clock_adjust,
        .update_stats           = vsc9959_update_stats,
+       .tas_guard_bands_update = vsc9959_tas_guard_bands_update,
 };
 
 static const struct felix_info felix_info_vsc9959 = {
@@ -2616,7 +2641,6 @@ static const struct felix_info felix_info_vsc9959 = {
        .port_modes             = vsc9959_port_modes,
        .port_setup_tc          = vsc9959_port_setup_tc,
        .port_sched_speed_set   = vsc9959_sched_speed_set,
-       .tas_guard_bands_update = vsc9959_tas_guard_bands_update,
 };
 
 /* The INTB interrupt is shared between for PTP TX timestamp availability
index b2bf78a..3b09370 100644 (file)
@@ -1002,6 +1002,8 @@ static const struct regmap_config ar9331_mdio_regmap_config = {
        .val_bits = 32,
        .reg_stride = 4,
        .max_register = AR9331_SW_REG_PAGE,
+       .use_single_read = true,
+       .use_single_write = true,
 
        .ranges = ar9331_regmap_range,
        .num_ranges = ARRAY_SIZE(ar9331_regmap_range),
@@ -1018,8 +1020,6 @@ static struct regmap_bus ar9331_sw_bus = {
        .val_format_endian_default = REGMAP_ENDIAN_NATIVE,
        .read = ar9331_mdio_read,
        .write = ar9331_sw_bus_write,
-       .max_raw_read = 4,
-       .max_raw_write = 4,
 };
 
 static int ar9331_sw_probe(struct mdio_device *mdiodev)
index f7d7cfb..efe9380 100644 (file)
@@ -576,8 +576,11 @@ static struct regmap_config qca8k_regmap_config = {
        .rd_table = &qca8k_readable_table,
        .disable_locking = true, /* Locking is handled by qca8k read/write */
        .cache_type = REGCACHE_NONE, /* Explicitly disable CACHE */
-       .max_raw_read = 32, /* mgmt eth can read/write up to 8 registers at time */
-       .max_raw_write = 32,
+       .max_raw_read = 32, /* mgmt eth can read up to 8 registers at time */
+       /* ATU regs suffer from a bug where some data are not correctly
+        * written. Disable bulk write to correctly write ATU entry.
+        */
+       .use_single_write = true,
 };
 
 static int
@@ -588,6 +591,9 @@ qca8k_phy_eth_busy_wait(struct qca8k_mgmt_eth_data *mgmt_eth_data,
        bool ack;
        int ret;
 
+       if (!skb)
+               return -ENOMEM;
+
        reinit_completion(&mgmt_eth_data->rw_done);
 
        /* Increment seq_num and set it in the copy pkt */
index 8c2dc0e..13b8452 100644 (file)
@@ -244,7 +244,7 @@ void qca8k_fdb_flush(struct qca8k_priv *priv)
 }
 
 static int qca8k_fdb_search_and_insert(struct qca8k_priv *priv, u8 port_mask,
-                                      const u8 *mac, u16 vid)
+                                      const u8 *mac, u16 vid, u8 aging)
 {
        struct qca8k_fdb fdb = { 0 };
        int ret;
@@ -261,10 +261,12 @@ static int qca8k_fdb_search_and_insert(struct qca8k_priv *priv, u8 port_mask,
                goto exit;
 
        /* Rule exist. Delete first */
-       if (!fdb.aging) {
+       if (fdb.aging) {
                ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1);
                if (ret)
                        goto exit;
+       } else {
+               fdb.aging = aging;
        }
 
        /* Add port to fdb portmask */
@@ -291,6 +293,10 @@ static int qca8k_fdb_search_and_del(struct qca8k_priv *priv, u8 port_mask,
        if (ret < 0)
                goto exit;
 
+       ret = qca8k_fdb_read(priv, &fdb);
+       if (ret < 0)
+               goto exit;
+
        /* Rule doesn't exist. Why delete? */
        if (!fdb.aging) {
                ret = -EINVAL;
@@ -810,7 +816,11 @@ int qca8k_port_mdb_add(struct dsa_switch *ds, int port,
        const u8 *addr = mdb->addr;
        u16 vid = mdb->vid;
 
-       return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid);
+       if (!vid)
+               vid = QCA8K_PORT_VID_DEF;
+
+       return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid,
+                                          QCA8K_ATU_STATUS_STATIC);
 }
 
 int qca8k_port_mdb_del(struct dsa_switch *ds, int port,
@@ -821,6 +831,9 @@ int qca8k_port_mdb_del(struct dsa_switch *ds, int port,
        const u8 *addr = mdb->addr;
        u16 vid = mdb->vid;
 
+       if (!vid)
+               vid = QCA8K_PORT_VID_DEF;
+
        return qca8k_fdb_search_and_del(priv, BIT(port), addr, vid);
 }
 
index 451c3a1..633b321 100644 (file)
@@ -35,6 +35,8 @@
 
 #define ENA_REGS_ADMIN_INTR_MASK 1
 
+#define ENA_MAX_BACKOFF_DELAY_EXP 16U
+
 #define ENA_MIN_ADMIN_POLL_US 100
 
 #define ENA_MAX_ADMIN_POLL_US 5000
@@ -536,6 +538,7 @@ static int ena_com_comp_status_to_errno(struct ena_com_admin_queue *admin_queue,
 
 static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us)
 {
+       exp = min_t(u32, exp, ENA_MAX_BACKOFF_DELAY_EXP);
        delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us);
        delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US);
        usleep_range(delay_us, 2 * delay_us);
index 4a28879..940c5d1 100644 (file)
@@ -2094,8 +2094,11 @@ static int atl1c_tso_csum(struct atl1c_adapter *adapter,
                        real_len = (((unsigned char *)ip_hdr(skb) - skb->data)
                                        + ntohs(ip_hdr(skb)->tot_len));
 
-                       if (real_len < skb->len)
-                               pskb_trim(skb, real_len);
+                       if (real_len < skb->len) {
+                               err = pskb_trim(skb, real_len);
+                               if (err)
+                                       return err;
+                       }
 
                        hdr_len = skb_tcp_all_headers(skb);
                        if (unlikely(skb->len == hdr_len)) {
index 5db0f34..5935be1 100644 (file)
@@ -1641,8 +1641,11 @@ static int atl1e_tso_csum(struct atl1e_adapter *adapter,
                        real_len = (((unsigned char *)ip_hdr(skb) - skb->data)
                                        + ntohs(ip_hdr(skb)->tot_len));
 
-                       if (real_len < skb->len)
-                               pskb_trim(skb, real_len);
+                       if (real_len < skb->len) {
+                               err = pskb_trim(skb, real_len);
+                               if (err)
+                                       return err;
+                       }
 
                        hdr_len = skb_tcp_all_headers(skb);
                        if (unlikely(skb->len == hdr_len)) {
index c8444bc..02aa6fd 100644 (file)
@@ -2113,8 +2113,11 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
 
                        real_len = (((unsigned char *)iph - skb->data) +
                                ntohs(iph->tot_len));
-                       if (real_len < skb->len)
-                               pskb_trim(skb, real_len);
+                       if (real_len < skb->len) {
+                               err = pskb_trim(skb, real_len);
+                               if (err)
+                                       return err;
+                       }
                        hdr_len = skb_tcp_all_headers(skb);
                        if (skb->len == hdr_len) {
                                iph->check = 0;
index 392ec09..3e4fb3c 100644 (file)
@@ -1793,11 +1793,9 @@ static int b44_nway_reset(struct net_device *dev)
        b44_readphy(bp, MII_BMCR, &bmcr);
        b44_readphy(bp, MII_BMCR, &bmcr);
        r = -EINVAL;
-       if (bmcr & BMCR_ANENABLE) {
-               b44_writephy(bp, MII_BMCR,
-                            bmcr | BMCR_ANRESTART);
-               r = 0;
-       }
+       if (bmcr & BMCR_ANENABLE)
+               r = b44_writephy(bp, MII_BMCR,
+                                bmcr | BMCR_ANRESTART);
        spin_unlock_irq(&bp->lock);
 
        return r;
index 1761df8..52ee375 100644 (file)
@@ -1448,7 +1448,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac)
        int err;
 
        phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
-       if (!phy_dev || IS_ERR(phy_dev)) {
+       if (IS_ERR(phy_dev)) {
                dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
                return -ENODEV;
        }
@@ -1492,8 +1492,6 @@ int bgmac_enet_probe(struct bgmac *bgmac)
 
        bgmac->in_init = true;
 
-       bgmac_chip_intrs_off(bgmac);
-
        net_dev->irq = bgmac->irq;
        SET_NETDEV_DEV(net_dev, bgmac->dev);
        dev_set_drvdata(bgmac->dev, bgmac);
@@ -1511,6 +1509,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
         */
        bgmac_clk_enable(bgmac, 0);
 
+       bgmac_chip_intrs_off(bgmac);
+
        /* This seems to be fixing IRQ by assigning OOB #6 to the core */
        if (!(bgmac->feature_flags & BGMAC_FEAT_IDM_MASK)) {
                if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6)
index 8bcde0a..e2a4e10 100644 (file)
@@ -1508,6 +1508,8 @@ struct bnx2x {
        bool                    cnic_loaded;
        struct cnic_eth_dev     *(*cnic_probe)(struct net_device *);
 
+       bool                    nic_stopped;
+
        /* Flag that indicates that we can start looking for FCoE L2 queue
         * completions in the default status block.
         */
index 6ea5521..e9c1e1b 100644 (file)
@@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
        bnx2x_add_all_napi(bp);
        DP(NETIF_MSG_IFUP, "napi added\n");
        bnx2x_napi_enable(bp);
+       bp->nic_stopped = false;
 
        if (IS_PF(bp)) {
                /* set pf load just before approaching the MCP */
@@ -2960,6 +2961,7 @@ load_error2:
 load_error1:
        bnx2x_napi_disable(bp);
        bnx2x_del_all_napi(bp);
+       bp->nic_stopped = true;
 
        /* clear pf_load status, as it was already set */
        if (IS_PF(bp))
@@ -3095,14 +3097,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
                if (!CHIP_IS_E1x(bp))
                        bnx2x_pf_disable(bp);
 
-               /* Disable HW interrupts, NAPI */
-               bnx2x_netif_stop(bp, 1);
-               /* Delete all NAPI objects */
-               bnx2x_del_all_napi(bp);
-               if (CNIC_LOADED(bp))
-                       bnx2x_del_all_napi_cnic(bp);
-               /* Release IRQs */
-               bnx2x_free_irq(bp);
+               if (!bp->nic_stopped) {
+                       /* Disable HW interrupts, NAPI */
+                       bnx2x_netif_stop(bp, 1);
+                       /* Delete all NAPI objects */
+                       bnx2x_del_all_napi(bp);
+                       if (CNIC_LOADED(bp))
+                               bnx2x_del_all_napi_cnic(bp);
+                       /* Release IRQs */
+                       bnx2x_free_irq(bp);
+                       bp->nic_stopped = true;
+               }
 
                /* Report UNLOAD_DONE to MCP */
                bnx2x_send_unload_done(bp, false);
index 1e7a6f1..0d8e61c 100644 (file)
@@ -9474,15 +9474,18 @@ unload_error:
                }
        }
 
-       /* Disable HW interrupts, NAPI */
-       bnx2x_netif_stop(bp, 1);
-       /* Delete all NAPI objects */
-       bnx2x_del_all_napi(bp);
-       if (CNIC_LOADED(bp))
-               bnx2x_del_all_napi_cnic(bp);
+       if (!bp->nic_stopped) {
+               /* Disable HW interrupts, NAPI */
+               bnx2x_netif_stop(bp, 1);
+               /* Delete all NAPI objects */
+               bnx2x_del_all_napi(bp);
+               if (CNIC_LOADED(bp))
+                       bnx2x_del_all_napi_cnic(bp);
 
-       /* Release IRQs */
-       bnx2x_free_irq(bp);
+               /* Release IRQs */
+               bnx2x_free_irq(bp);
+               bp->nic_stopped = true;
+       }
 
        /* Reset the chip, unless PCI function is offline. If we reach this
         * point following a PCI error handling, it means device is really
@@ -14238,13 +14241,16 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
                }
                bnx2x_drain_tx_queues(bp);
                bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
-               bnx2x_netif_stop(bp, 1);
-               bnx2x_del_all_napi(bp);
+               if (!bp->nic_stopped) {
+                       bnx2x_netif_stop(bp, 1);
+                       bnx2x_del_all_napi(bp);
 
-               if (CNIC_LOADED(bp))
-                       bnx2x_del_all_napi_cnic(bp);
+                       if (CNIC_LOADED(bp))
+                               bnx2x_del_all_napi_cnic(bp);
 
-               bnx2x_free_irq(bp);
+                       bnx2x_free_irq(bp);
+                       bp->nic_stopped = true;
+               }
 
                /* Report UNLOAD_DONE to MCP */
                bnx2x_send_unload_done(bp, true);
index 0657a0f..8946a93 100644 (file)
@@ -529,13 +529,16 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)
        bnx2x_vfpf_finalize(bp, &req->first_tlv);
 
 free_irq:
-       /* Disable HW interrupts, NAPI */
-       bnx2x_netif_stop(bp, 0);
-       /* Delete all NAPI objects */
-       bnx2x_del_all_napi(bp);
-
-       /* Release IRQs */
-       bnx2x_free_irq(bp);
+       if (!bp->nic_stopped) {
+               /* Disable HW interrupts, NAPI */
+               bnx2x_netif_stop(bp, 0);
+               /* Delete all NAPI objects */
+               bnx2x_del_all_napi(bp);
+
+               /* Release IRQs */
+               bnx2x_free_irq(bp);
+               bp->nic_stopped = true;
+       }
 }
 
 static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf,
index e5b54e6..1eb490c 100644 (file)
@@ -633,12 +633,13 @@ tx_kick_pending:
        return NETDEV_TX_OK;
 }
 
-static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 {
        struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
        struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
        u16 cons = txr->tx_cons;
        struct pci_dev *pdev = bp->pdev;
+       int nr_pkts = bnapi->tx_pkts;
        int i;
        unsigned int tx_bytes = 0;
 
@@ -688,6 +689,7 @@ next_tx_int:
                dev_kfree_skb_any(skb);
        }
 
+       bnapi->tx_pkts = 0;
        WRITE_ONCE(txr->tx_cons, cons);
 
        __netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
@@ -697,17 +699,24 @@ next_tx_int:
 
 static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
                                         struct bnxt_rx_ring_info *rxr,
+                                        unsigned int *offset,
                                         gfp_t gfp)
 {
        struct device *dev = &bp->pdev->dev;
        struct page *page;
 
-       page = page_pool_dev_alloc_pages(rxr->page_pool);
+       if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) {
+               page = page_pool_dev_alloc_frag(rxr->page_pool, offset,
+                                               BNXT_RX_PAGE_SIZE);
+       } else {
+               page = page_pool_dev_alloc_pages(rxr->page_pool);
+               *offset = 0;
+       }
        if (!page)
                return NULL;
 
-       *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
-                                     DMA_ATTR_WEAK_ORDERING);
+       *mapping = dma_map_page_attrs(dev, page, *offset, BNXT_RX_PAGE_SIZE,
+                                     bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
        if (dma_mapping_error(dev, *mapping)) {
                page_pool_recycle_direct(rxr->page_pool, page);
                return NULL;
@@ -747,15 +756,16 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
        dma_addr_t mapping;
 
        if (BNXT_RX_PAGE_MODE(bp)) {
+               unsigned int offset;
                struct page *page =
-                       __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
+                       __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);
 
                if (!page)
                        return -ENOMEM;
 
                mapping += bp->rx_dma_offset;
                rx_buf->data = page;
-               rx_buf->data_ptr = page_address(page) + bp->rx_offset;
+               rx_buf->data_ptr = page_address(page) + offset + bp->rx_offset;
        } else {
                u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, gfp);
 
@@ -815,7 +825,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
        unsigned int offset = 0;
 
        if (BNXT_RX_PAGE_MODE(bp)) {
-               page = __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
+               page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);
 
                if (!page)
                        return -ENOMEM;
@@ -962,15 +972,15 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
                return NULL;
        }
        dma_addr -= bp->rx_dma_offset;
-       dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
-                            DMA_ATTR_WEAK_ORDERING);
-       skb = build_skb(page_address(page), PAGE_SIZE);
+       dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE,
+                            bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
+       skb = build_skb(data_ptr - bp->rx_offset, BNXT_RX_PAGE_SIZE);
        if (!skb) {
                page_pool_recycle_direct(rxr->page_pool, page);
                return NULL;
        }
        skb_mark_for_recycle(skb);
-       skb_reserve(skb, bp->rx_dma_offset);
+       skb_reserve(skb, bp->rx_offset);
        __skb_put(skb, len);
 
        return skb;
@@ -996,8 +1006,8 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
                return NULL;
        }
        dma_addr -= bp->rx_dma_offset;
-       dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
-                            DMA_ATTR_WEAK_ORDERING);
+       dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE,
+                            bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
 
        if (unlikely(!payload))
                payload = eth_get_headlen(bp->dev, data_ptr, len);
@@ -1010,7 +1020,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
 
        skb_mark_for_recycle(skb);
        off = (void *)data_ptr - page_address(page);
-       skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE);
+       skb_add_rx_frag(skb, 0, page, off, len, BNXT_RX_PAGE_SIZE);
        memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN,
               payload + NET_IP_ALIGN);
 
@@ -1141,7 +1151,7 @@ static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp,
 
        skb->data_len += total_frag_len;
        skb->len += total_frag_len;
-       skb->truesize += PAGE_SIZE * agg_bufs;
+       skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs;
        return skb;
 }
 
@@ -2569,12 +2579,11 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
        return rx_pkts;
 }
 
-static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
+static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi,
+                                 int budget)
 {
-       if (bnapi->tx_pkts) {
-               bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
-               bnapi->tx_pkts = 0;
-       }
+       if (bnapi->tx_pkts)
+               bnapi->tx_int(bp, bnapi, budget);
 
        if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) {
                struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
@@ -2603,7 +2612,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
         */
        bnxt_db_cq(bp, &cpr->cp_db, cpr->cp_raw_cons);
 
-       __bnxt_poll_work_done(bp, bnapi);
+       __bnxt_poll_work_done(bp, bnapi, budget);
        return rx_pkts;
 }
 
@@ -2734,7 +2743,7 @@ static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 }
 
 static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
-                                u64 dbr_type)
+                                u64 dbr_type, int budget)
 {
        struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
        int i;
@@ -2750,7 +2759,7 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
                        cpr2->had_work_done = 0;
                }
        }
-       __bnxt_poll_work_done(bp, bnapi);
+       __bnxt_poll_work_done(bp, bnapi, budget);
 }
 
 static int bnxt_poll_p5(struct napi_struct *napi, int budget)
@@ -2780,7 +2789,8 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
                        if (cpr->has_more_work)
                                break;
 
-                       __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL);
+                       __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL,
+                                            budget);
                        cpr->cp_raw_cons = raw_cons;
                        if (napi_complete_done(napi, work_done))
                                BNXT_DB_NQ_ARM_P5(&cpr->cp_db,
@@ -2810,7 +2820,7 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
                }
                raw_cons = NEXT_RAW_CMP(raw_cons);
        }
-       __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ);
+       __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ, budget);
        if (raw_cons != cpr->cp_raw_cons) {
                cpr->cp_raw_cons = raw_cons;
                BNXT_DB_NQ_P5(&cpr->cp_db, raw_cons);
@@ -2943,8 +2953,8 @@ skip_rx_tpa_free:
                rx_buf->data = NULL;
                if (BNXT_RX_PAGE_MODE(bp)) {
                        mapping -= bp->rx_dma_offset;
-                       dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
-                                            bp->rx_dir,
+                       dma_unmap_page_attrs(&pdev->dev, mapping,
+                                            BNXT_RX_PAGE_SIZE, bp->rx_dir,
                                             DMA_ATTR_WEAK_ORDERING);
                        page_pool_recycle_direct(rxr->page_pool, data);
                } else {
@@ -3213,6 +3223,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
        pp.napi = &rxr->bnapi->napi;
        pp.dev = &bp->pdev->dev;
        pp.dma_dir = DMA_BIDIRECTIONAL;
+       if (PAGE_SIZE > BNXT_RX_PAGE_SIZE)
+               pp.flags |= PP_FLAG_PAGE_FRAG;
 
        rxr->page_pool = page_pool_create(&pp);
        if (IS_ERR(rxr->page_pool)) {
@@ -3989,26 +4001,29 @@ void bnxt_set_ring_params(struct bnxt *bp)
  */
 int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
 {
+       struct net_device *dev = bp->dev;
+
        if (page_mode) {
                bp->flags &= ~BNXT_FLAG_AGG_RINGS;
                bp->flags |= BNXT_FLAG_RX_PAGE_MODE;
 
-               if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
+               if (bp->xdp_prog->aux->xdp_has_frags)
+                       dev->max_mtu = min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
+               else
+                       dev->max_mtu =
+                               min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
+               if (dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
                        bp->flags |= BNXT_FLAG_JUMBO;
                        bp->rx_skb_func = bnxt_rx_multi_page_skb;
-                       bp->dev->max_mtu =
-                               min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
                } else {
                        bp->flags |= BNXT_FLAG_NO_AGG_RINGS;
                        bp->rx_skb_func = bnxt_rx_page_skb;
-                       bp->dev->max_mtu =
-                               min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
                }
                bp->rx_dir = DMA_BIDIRECTIONAL;
                /* Disable LRO or GRO_HW */
-               netdev_update_features(bp->dev);
+               netdev_update_features(dev);
        } else {
-               bp->dev->max_mtu = bp->max_mtu;
+               dev->max_mtu = bp->max_mtu;
                bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
                bp->rx_dir = DMA_FROM_DEVICE;
                bp->rx_skb_func = bnxt_rx_skb;
@@ -9429,6 +9444,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
                        cpr->sw_stats.rx.rx_resets++;
                bnapi->in_reset = false;
 
+               bnapi->tx_pkts = 0;
+
                if (bnapi->rx_ring) {
                        INIT_WORK(&cpr->dim.work, bnxt_dim_work);
                        cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
index 080e734..bb95c3d 100644 (file)
@@ -1005,7 +1005,7 @@ struct bnxt_napi {
        struct bnxt_tx_ring_info        *tx_ring;
 
        void                    (*tx_int)(struct bnxt *, struct bnxt_napi *,
-                                         int);
+                                         int budget);
        int                     tx_pkts;
        u8                      events;
 
index 4efa5fe..fb43232 100644 (file)
@@ -125,16 +125,20 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
        dma_unmap_len_set(tx_buf, len, 0);
 }
 
-void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 {
        struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
        struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
        bool rx_doorbell_needed = false;
+       int nr_pkts = bnapi->tx_pkts;
        struct bnxt_sw_tx_bd *tx_buf;
        u16 tx_cons = txr->tx_cons;
        u16 last_tx_cons = tx_cons;
        int i, j, frags;
 
+       if (!budget)
+               return;
+
        for (i = 0; i < nr_pkts; i++) {
                tx_buf = &txr->tx_buf_ring[tx_cons];
 
@@ -161,6 +165,8 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
                }
                tx_cons = NEXT_TX(tx_cons);
        }
+
+       bnapi->tx_pkts = 0;
        WRITE_ONCE(txr->tx_cons, tx_cons);
        if (rx_doorbell_needed) {
                tx_buf = &txr->tx_buf_ring[last_tx_cons];
@@ -180,8 +186,8 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
                        u16 cons, u8 *data_ptr, unsigned int len,
                        struct xdp_buff *xdp)
 {
+       u32 buflen = BNXT_RX_PAGE_SIZE;
        struct bnxt_sw_rx_bd *rx_buf;
-       u32 buflen = PAGE_SIZE;
        struct pci_dev *pdev;
        dma_addr_t mapping;
        u32 offset;
@@ -297,7 +303,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
                rx_buf = &rxr->rx_buf_ring[cons];
                mapping = rx_buf->mapping - bp->rx_dma_offset;
                dma_unmap_page_attrs(&pdev->dev, mapping,
-                                    PAGE_SIZE, bp->rx_dir,
+                                    BNXT_RX_PAGE_SIZE, bp->rx_dir,
                                     DMA_ATTR_WEAK_ORDERING);
 
                /* if we are unable to allocate a new buffer, abort and reuse */
@@ -480,7 +486,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
        }
        xdp_update_skb_shared_info(skb, num_frags,
                                   sinfo->xdp_frags_size,
-                                  PAGE_SIZE * sinfo->nr_frags,
+                                  BNXT_RX_PAGE_SIZE * sinfo->nr_frags,
                                   xdp_buff_is_frag_pfmemalloc(xdp));
        return skb;
 }
index ea430d6..5e412c5 100644 (file)
@@ -16,7 +16,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
                                   struct bnxt_tx_ring_info *txr,
                                   dma_addr_t mapping, u32 len,
                                   struct xdp_buff *xdp);
-void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
+void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget);
 bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
                 struct xdp_buff xdp, struct page *page, u8 **data_ptr,
                 unsigned int *len, u8 *event);
index 0092e46..cc3afb6 100644 (file)
@@ -617,7 +617,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
                };
 
                phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
-               if (!phydev || IS_ERR(phydev)) {
+               if (IS_ERR(phydev)) {
                        dev_err(kdev, "failed to register fixed PHY device\n");
                        return -ENODEV;
                }
index 5ef073a..cb2810f 100644 (file)
@@ -6881,7 +6881,10 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 
                        ri->data = NULL;
 
-                       skb = build_skb(data, frag_size);
+                       if (frag_size)
+                               skb = build_skb(data, frag_size);
+                       else
+                               skb = slab_build_skb(data);
                        if (!skb) {
                                tg3_frag_free(frag_size != 0, data);
                                goto drop_it_no_recycle;
index 04ad0f2..7246e13 100644 (file)
@@ -512,11 +512,6 @@ bnad_debugfs_init(struct bnad *bnad)
        if (!bnad->port_debugfs_root) {
                bnad->port_debugfs_root =
                        debugfs_create_dir(name, bna_debugfs_root);
-               if (!bnad->port_debugfs_root) {
-                       netdev_warn(bnad->netdev,
-                                   "debugfs root dir creation failed\n");
-                       return;
-               }
 
                atomic_inc(&bna_debugfs_port_count);
 
index f6a0f12..82929ee 100644 (file)
@@ -5194,6 +5194,9 @@ static int __maybe_unused macb_suspend(struct device *dev)
        unsigned int q;
        int err;
 
+       if (!device_may_wakeup(&bp->dev->dev))
+               phy_exit(bp->sgmii_phy);
+
        if (!netif_running(netdev))
                return 0;
 
@@ -5254,7 +5257,6 @@ static int __maybe_unused macb_suspend(struct device *dev)
        if (!(bp->wol & MACB_WOL_ENABLED)) {
                rtnl_lock();
                phylink_stop(bp->phylink);
-               phy_exit(bp->sgmii_phy);
                rtnl_unlock();
                spin_lock_irqsave(&bp->lock, flags);
                macb_reset_hw(bp);
@@ -5284,6 +5286,9 @@ static int __maybe_unused macb_resume(struct device *dev)
        unsigned int q;
        int err;
 
+       if (!device_may_wakeup(&bp->dev->dev))
+               phy_init(bp->sgmii_phy);
+
        if (!netif_running(netdev))
                return 0;
 
@@ -5344,8 +5349,6 @@ static int __maybe_unused macb_resume(struct device *dev)
        macb_set_rx_mode(netdev);
        macb_restore_features(bp);
        rtnl_lock();
-       if (!device_may_wakeup(&bp->dev->dev))
-               phy_init(bp->sgmii_phy);
 
        phylink_start(bp->phylink);
        rtnl_unlock();
index c2e7037..7750702 100644 (file)
@@ -1466,7 +1466,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
        tp->write_seq = snd_isn;
        tp->snd_nxt = snd_isn;
        tp->snd_una = snd_isn;
-       inet_sk(sk)->inet_id = get_random_u16();
+       atomic_set(&inet_sk(sk)->inet_id, get_random_u16());
        assign_rxopt(sk, opt);
 
        if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
index 18c2fc8..0616b5f 100644 (file)
@@ -1138,7 +1138,8 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
            (lancer_chip(adapter) || BE3_chip(adapter) ||
             skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
                ip = (struct iphdr *)ip_hdr(skb);
-               pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
+               if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
+                       goto tx_drop;
        }
 
        /* If vlan tag is already inlined in the packet, skip HW VLAN
index 1416262..e0a4cb7 100644 (file)
@@ -1186,14 +1186,9 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
 
 static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
 {
-       struct device_node *node = pdev->dev.of_node;
        struct platform_device *ierb_pdev;
        struct device_node *ierb_node;
 
-       /* Don't register with the IERB if the PF itself is disabled */
-       if (!node || !of_device_is_available(node))
-               return 0;
-
        ierb_node = of_find_compatible_node(NULL, NULL,
                                            "fsl,ls1028a-enetc-ierb");
        if (!ierb_node || !of_device_is_available(ierb_node))
@@ -1208,56 +1203,81 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
        return enetc_ierb_register_pf(ierb_pdev, pdev);
 }
 
-static int enetc_pf_probe(struct pci_dev *pdev,
-                         const struct pci_device_id *ent)
+static struct enetc_si *enetc_psi_create(struct pci_dev *pdev)
 {
-       struct device_node *node = pdev->dev.of_node;
-       struct enetc_ndev_priv *priv;
-       struct net_device *ndev;
        struct enetc_si *si;
-       struct enetc_pf *pf;
        int err;
 
-       err = enetc_pf_register_with_ierb(pdev);
-       if (err == -EPROBE_DEFER)
-               return err;
-       if (err)
-               dev_warn(&pdev->dev,
-                        "Could not register with IERB driver: %pe, please update the device tree\n",
-                        ERR_PTR(err));
-
-       err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
-       if (err)
-               return dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+       err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(struct enetc_pf));
+       if (err) {
+               dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+               goto out;
+       }
 
        si = pci_get_drvdata(pdev);
        if (!si->hw.port || !si->hw.global) {
                err = -ENODEV;
                dev_err(&pdev->dev, "could not map PF space, probing a VF?\n");
-               goto err_map_pf_space;
+               goto out_pci_remove;
        }
 
        err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
                               &si->cbd_ring);
        if (err)
-               goto err_setup_cbdr;
+               goto out_pci_remove;
 
        err = enetc_init_port_rfs_memory(si);
        if (err) {
                dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
-               goto err_init_port_rfs;
+               goto out_teardown_cbdr;
        }
 
        err = enetc_init_port_rss_memory(si);
        if (err) {
                dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
-               goto err_init_port_rss;
+               goto out_teardown_cbdr;
        }
 
-       if (node && !of_device_is_available(node)) {
-               dev_info(&pdev->dev, "device is disabled, skipping\n");
-               err = -ENODEV;
-               goto err_device_disabled;
+       return si;
+
+out_teardown_cbdr:
+       enetc_teardown_cbdr(&si->cbd_ring);
+out_pci_remove:
+       enetc_pci_remove(pdev);
+out:
+       return ERR_PTR(err);
+}
+
+static void enetc_psi_destroy(struct pci_dev *pdev)
+{
+       struct enetc_si *si = pci_get_drvdata(pdev);
+
+       enetc_teardown_cbdr(&si->cbd_ring);
+       enetc_pci_remove(pdev);
+}
+
+static int enetc_pf_probe(struct pci_dev *pdev,
+                         const struct pci_device_id *ent)
+{
+       struct device_node *node = pdev->dev.of_node;
+       struct enetc_ndev_priv *priv;
+       struct net_device *ndev;
+       struct enetc_si *si;
+       struct enetc_pf *pf;
+       int err;
+
+       err = enetc_pf_register_with_ierb(pdev);
+       if (err == -EPROBE_DEFER)
+               return err;
+       if (err)
+               dev_warn(&pdev->dev,
+                        "Could not register with IERB driver: %pe, please update the device tree\n",
+                        ERR_PTR(err));
+
+       si = enetc_psi_create(pdev);
+       if (IS_ERR(si)) {
+               err = PTR_ERR(si);
+               goto err_psi_create;
        }
 
        pf = enetc_si_priv(si);
@@ -1339,15 +1359,9 @@ err_alloc_si_res:
        si->ndev = NULL;
        free_netdev(ndev);
 err_alloc_netdev:
-err_init_port_rss:
-err_init_port_rfs:
-err_device_disabled:
 err_setup_mac_addresses:
-       enetc_teardown_cbdr(&si->cbd_ring);
-err_setup_cbdr:
-err_map_pf_space:
-       enetc_pci_remove(pdev);
-
+       enetc_psi_destroy(pdev);
+err_psi_create:
        return err;
 }
 
@@ -1370,12 +1384,29 @@ static void enetc_pf_remove(struct pci_dev *pdev)
        enetc_free_msix(priv);
 
        enetc_free_si_resources(priv);
-       enetc_teardown_cbdr(&si->cbd_ring);
 
        free_netdev(si->ndev);
 
-       enetc_pci_remove(pdev);
+       enetc_psi_destroy(pdev);
+}
+
+static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
+{
+       struct device_node *node = pdev->dev.of_node;
+       struct enetc_si *si;
+
+       /* Only apply quirk for disabled functions. For the ones
+        * that are enabled, enetc_pf_probe() will apply it.
+        */
+       if (node && of_device_is_available(node))
+               return;
+
+       si = enetc_psi_create(pdev);
+       if (si)
+               enetc_psi_destroy(pdev);
 }
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
+                       enetc_fixup_clear_rss_rfs);
 
 static const struct pci_device_id enetc_pf_id_table[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) },
index 9939cca..63a053d 100644 (file)
@@ -355,7 +355,7 @@ struct bufdesc_ex {
 #define RX_RING_SIZE           (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES)
 #define FEC_ENET_TX_FRSIZE     2048
 #define FEC_ENET_TX_FRPPG      (PAGE_SIZE / FEC_ENET_TX_FRSIZE)
-#define TX_RING_SIZE           512     /* Must be power of two */
+#define TX_RING_SIZE           1024    /* Must be power of two */
 #define TX_RING_MOD_MASK       511     /*   for this to work */
 
 #define BD_ENET_RX_INT         0x00800000
@@ -544,10 +544,23 @@ enum {
        XDP_STATS_TOTAL,
 };
 
+enum fec_txbuf_type {
+       FEC_TXBUF_T_SKB,
+       FEC_TXBUF_T_XDP_NDO,
+};
+
+struct fec_tx_buffer {
+       union {
+               struct sk_buff *skb;
+               struct xdp_frame *xdp;
+       };
+       enum fec_txbuf_type type;
+};
+
 struct fec_enet_priv_tx_q {
        struct bufdesc_prop bd;
        unsigned char *tx_bounce[TX_RING_SIZE];
-       struct  sk_buff *tx_skbuff[TX_RING_SIZE];
+       struct fec_tx_buffer tx_buf[TX_RING_SIZE];
 
        unsigned short tx_stop_threshold;
        unsigned short tx_wake_threshold;
index 8fbe477..66b5cbd 100644 (file)
@@ -397,7 +397,7 @@ static void fec_dump(struct net_device *ndev)
                        fec16_to_cpu(bdp->cbd_sc),
                        fec32_to_cpu(bdp->cbd_bufaddr),
                        fec16_to_cpu(bdp->cbd_datlen),
-                       txq->tx_skbuff[index]);
+                       txq->tx_buf[index].skb);
                bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
                index++;
        } while (bdp != txq->bd.base);
@@ -654,7 +654,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
 
        index = fec_enet_get_bd_index(last_bdp, &txq->bd);
        /* Save skb pointer */
-       txq->tx_skbuff[index] = skb;
+       txq->tx_buf[index].skb = skb;
 
        /* Make sure the updates to rest of the descriptor are performed before
         * transferring ownership.
@@ -672,9 +672,7 @@ static int fec_enet_txq_submit_skb(struct fec_enet_priv_tx_q *txq,
 
        skb_tx_timestamp(skb);
 
-       /* Make sure the update to bdp and tx_skbuff are performed before
-        * txq->bd.cur.
-        */
+       /* Make sure the update to bdp is performed before txq->bd.cur. */
        wmb();
        txq->bd.cur = bdp;
 
@@ -862,7 +860,7 @@ static int fec_enet_txq_submit_tso(struct fec_enet_priv_tx_q *txq,
        }
 
        /* Save skb pointer */
-       txq->tx_skbuff[index] = skb;
+       txq->tx_buf[index].skb = skb;
 
        skb_tx_timestamp(skb);
        txq->bd.cur = bdp;
@@ -952,16 +950,33 @@ static void fec_enet_bd_init(struct net_device *dev)
                for (i = 0; i < txq->bd.ring_size; i++) {
                        /* Initialize the BD for every fragment in the page. */
                        bdp->cbd_sc = cpu_to_fec16(0);
-                       if (bdp->cbd_bufaddr &&
-                           !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
-                               dma_unmap_single(&fep->pdev->dev,
-                                                fec32_to_cpu(bdp->cbd_bufaddr),
-                                                fec16_to_cpu(bdp->cbd_datlen),
-                                                DMA_TO_DEVICE);
-                       if (txq->tx_skbuff[i]) {
-                               dev_kfree_skb_any(txq->tx_skbuff[i]);
-                               txq->tx_skbuff[i] = NULL;
+                       if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) {
+                               if (bdp->cbd_bufaddr &&
+                                   !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+                                       dma_unmap_single(&fep->pdev->dev,
+                                                        fec32_to_cpu(bdp->cbd_bufaddr),
+                                                        fec16_to_cpu(bdp->cbd_datlen),
+                                                        DMA_TO_DEVICE);
+                               if (txq->tx_buf[i].skb) {
+                                       dev_kfree_skb_any(txq->tx_buf[i].skb);
+                                       txq->tx_buf[i].skb = NULL;
+                               }
+                       } else {
+                               if (bdp->cbd_bufaddr)
+                                       dma_unmap_single(&fep->pdev->dev,
+                                                        fec32_to_cpu(bdp->cbd_bufaddr),
+                                                        fec16_to_cpu(bdp->cbd_datlen),
+                                                        DMA_TO_DEVICE);
+
+                               if (txq->tx_buf[i].xdp) {
+                                       xdp_return_frame(txq->tx_buf[i].xdp);
+                                       txq->tx_buf[i].xdp = NULL;
+                               }
+
+                               /* restore default tx buffer type: FEC_TXBUF_T_SKB */
+                               txq->tx_buf[i].type = FEC_TXBUF_T_SKB;
                        }
+
                        bdp->cbd_bufaddr = cpu_to_fec32(0);
                        bdp = fec_enet_get_nextdesc(bdp, &txq->bd);
                }
@@ -1357,9 +1372,10 @@ fec_enet_hwtstamp(struct fec_enet_private *fep, unsigned ts,
 }
 
 static void
-fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
+fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget)
 {
        struct  fec_enet_private *fep;
+       struct xdp_frame *xdpf;
        struct bufdesc *bdp;
        unsigned short status;
        struct  sk_buff *skb;
@@ -1387,16 +1403,39 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 
                index = fec_enet_get_bd_index(bdp, &txq->bd);
 
-               skb = txq->tx_skbuff[index];
-               txq->tx_skbuff[index] = NULL;
-               if (!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
-                       dma_unmap_single(&fep->pdev->dev,
-                                        fec32_to_cpu(bdp->cbd_bufaddr),
-                                        fec16_to_cpu(bdp->cbd_datlen),
-                                        DMA_TO_DEVICE);
-               bdp->cbd_bufaddr = cpu_to_fec32(0);
-               if (!skb)
-                       goto skb_done;
+               if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) {
+                       skb = txq->tx_buf[index].skb;
+                       txq->tx_buf[index].skb = NULL;
+                       if (bdp->cbd_bufaddr &&
+                           !IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
+                               dma_unmap_single(&fep->pdev->dev,
+                                                fec32_to_cpu(bdp->cbd_bufaddr),
+                                                fec16_to_cpu(bdp->cbd_datlen),
+                                                DMA_TO_DEVICE);
+                       bdp->cbd_bufaddr = cpu_to_fec32(0);
+                       if (!skb)
+                               goto tx_buf_done;
+               } else {
+                       /* Tx processing cannot call any XDP (or page pool) APIs if
+                        * the "budget" is 0. Because NAPI is called with budget of
+                        * 0 (such as netpoll) indicates we may be in an IRQ context,
+                        * however, we can't use the page pool from IRQ context.
+                        */
+                       if (unlikely(!budget))
+                               break;
+
+                       xdpf = txq->tx_buf[index].xdp;
+                       if (bdp->cbd_bufaddr)
+                               dma_unmap_single(&fep->pdev->dev,
+                                                fec32_to_cpu(bdp->cbd_bufaddr),
+                                                fec16_to_cpu(bdp->cbd_datlen),
+                                                DMA_TO_DEVICE);
+                       bdp->cbd_bufaddr = cpu_to_fec32(0);
+                       if (!xdpf) {
+                               txq->tx_buf[index].type = FEC_TXBUF_T_SKB;
+                               goto tx_buf_done;
+                       }
+               }
 
                /* Check for errors. */
                if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
@@ -1415,21 +1454,11 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
                                ndev->stats.tx_carrier_errors++;
                } else {
                        ndev->stats.tx_packets++;
-                       ndev->stats.tx_bytes += skb->len;
-               }
 
-               /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
-                * are to time stamp the packet, so we still need to check time
-                * stamping enabled flag.
-                */
-               if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
-                            fep->hwts_tx_en) &&
-                   fep->bufdesc_ex) {
-                       struct skb_shared_hwtstamps shhwtstamps;
-                       struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
-
-                       fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps);
-                       skb_tstamp_tx(skb, &shhwtstamps);
+                       if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB)
+                               ndev->stats.tx_bytes += skb->len;
+                       else
+                               ndev->stats.tx_bytes += xdpf->len;
                }
 
                /* Deferred means some collisions occurred during transmit,
@@ -1438,10 +1467,32 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
                if (status & BD_ENET_TX_DEF)
                        ndev->stats.collisions++;
 
-               /* Free the sk buffer associated with this last transmit */
-               dev_kfree_skb_any(skb);
-skb_done:
-               /* Make sure the update to bdp and tx_skbuff are performed
+               if (txq->tx_buf[index].type == FEC_TXBUF_T_SKB) {
+                       /* NOTE: SKBTX_IN_PROGRESS being set does not imply it's we who
+                        * are to time stamp the packet, so we still need to check time
+                        * stamping enabled flag.
+                        */
+                       if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS &&
+                                    fep->hwts_tx_en) && fep->bufdesc_ex) {
+                               struct skb_shared_hwtstamps shhwtstamps;
+                               struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
+
+                               fec_enet_hwtstamp(fep, fec32_to_cpu(ebdp->ts), &shhwtstamps);
+                               skb_tstamp_tx(skb, &shhwtstamps);
+                       }
+
+                       /* Free the sk buffer associated with this last transmit */
+                       dev_kfree_skb_any(skb);
+               } else {
+                       xdp_return_frame(xdpf);
+
+                       txq->tx_buf[index].xdp = NULL;
+                       /* restore default tx buffer type: FEC_TXBUF_T_SKB */
+                       txq->tx_buf[index].type = FEC_TXBUF_T_SKB;
+               }
+
+tx_buf_done:
+               /* Make sure the update to bdp and tx_buf are performed
                 * before dirty_tx
                 */
                wmb();
@@ -1465,14 +1516,14 @@ skb_done:
                writel(0, txq->bd.reg_desc_active);
 }
 
-static void fec_enet_tx(struct net_device *ndev)
+static void fec_enet_tx(struct net_device *ndev, int budget)
 {
        struct fec_enet_private *fep = netdev_priv(ndev);
        int i;
 
        /* Make sure that AVB queues are processed first. */
        for (i = fep->num_tx_queues - 1; i >= 0; i--)
-               fec_enet_tx_queue(ndev, i);
+               fec_enet_tx_queue(ndev, i, budget);
 }
 
 static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
@@ -1815,7 +1866,7 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
 
        do {
                done += fec_enet_rx(ndev, budget - done);
-               fec_enet_tx(ndev);
+               fec_enet_tx(ndev, budget);
        } while ((done < budget) && fec_enet_collect_events(fep));
 
        if (done < budget) {
@@ -3249,9 +3300,19 @@ static void fec_enet_free_buffers(struct net_device *ndev)
                for (i = 0; i < txq->bd.ring_size; i++) {
                        kfree(txq->tx_bounce[i]);
                        txq->tx_bounce[i] = NULL;
-                       skb = txq->tx_skbuff[i];
-                       txq->tx_skbuff[i] = NULL;
-                       dev_kfree_skb(skb);
+
+                       if (txq->tx_buf[i].type == FEC_TXBUF_T_SKB) {
+                               skb = txq->tx_buf[i].skb;
+                               txq->tx_buf[i].skb = NULL;
+                               dev_kfree_skb(skb);
+                       } else {
+                               if (txq->tx_buf[i].xdp) {
+                                       xdp_return_frame(txq->tx_buf[i].xdp);
+                                       txq->tx_buf[i].xdp = NULL;
+                               }
+
+                               txq->tx_buf[i].type = FEC_TXBUF_T_SKB;
+                       }
                }
        }
 }
@@ -3296,8 +3357,7 @@ static int fec_enet_alloc_queue(struct net_device *ndev)
                fep->total_tx_ring_size += fep->tx_queue[i]->bd.ring_size;
 
                txq->tx_stop_threshold = FEC_MAX_SKB_DESCS;
-               txq->tx_wake_threshold =
-                       (txq->bd.ring_size - txq->tx_stop_threshold) / 2;
+               txq->tx_wake_threshold = FEC_MAX_SKB_DESCS + 2 * MAX_SKB_FRAGS;
 
                txq->tso_hdrs = dma_alloc_coherent(&fep->pdev->dev,
                                        txq->bd.ring_size * TSO_HEADER_SIZE,
@@ -3732,12 +3792,18 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf)
                if (fep->quirks & FEC_QUIRK_SWAP_FRAME)
                        return -EOPNOTSUPP;
 
+               if (!bpf->prog)
+                       xdp_features_clear_redirect_target(dev);
+
                if (is_run) {
                        napi_disable(&fep->napi);
                        netif_tx_disable(dev);
                }
 
                old_prog = xchg(&fep->xdp_prog, bpf->prog);
+               if (old_prog)
+                       bpf_prog_put(old_prog);
+
                fec_restart(dev);
 
                if (is_run) {
@@ -3745,8 +3811,8 @@ static int fec_enet_bpf(struct net_device *dev, struct netdev_bpf *bpf)
                        netif_tx_start_all_queues(dev);
                }
 
-               if (old_prog)
-                       bpf_prog_put(old_prog);
+               if (bpf->prog)
+                       xdp_features_set_redirect_target(dev, false);
 
                return 0;
 
@@ -3778,7 +3844,7 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
 
        entries_free = fec_enet_get_free_txdesc_num(txq);
        if (entries_free < MAX_SKB_FRAGS + 1) {
-               netdev_err(fep->netdev, "NOT enough BD for SG!\n");
+               netdev_err_once(fep->netdev, "NOT enough BD for SG!\n");
                return -EBUSY;
        }
 
@@ -3811,7 +3877,8 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
                ebdp->cbd_esc = cpu_to_fec32(estatus);
        }
 
-       txq->tx_skbuff[index] = NULL;
+       txq->tx_buf[index].type = FEC_TXBUF_T_XDP_NDO;
+       txq->tx_buf[index].xdp = frame;
 
        /* Make sure the updates to rest of the descriptor are performed before
         * transferring ownership.
@@ -3857,6 +3924,8 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
 
        __netif_tx_lock(nq, cpu);
 
+       /* Avoid tx timeout as XDP shares the queue with kernel stack */
+       txq_trans_cond_update(nq);
        for (i = 0; i < num_frames; i++) {
                if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
                        break;
@@ -4016,8 +4085,7 @@ static int fec_enet_init(struct net_device *ndev)
 
        if (!(fep->quirks & FEC_QUIRK_SWAP_FRAME))
                ndev->xdp_features = NETDEV_XDP_ACT_BASIC |
-                                    NETDEV_XDP_ACT_REDIRECT |
-                                    NETDEV_XDP_ACT_NDO_XMIT;
+                                    NETDEV_XDP_ACT_REDIRECT;
 
        fec_restart(ndev);
 
index 98eb78d..4b425bf 100644 (file)
@@ -964,5 +964,6 @@ void gve_handle_report_stats(struct gve_priv *priv);
 /* exported by ethtool.c */
 extern const struct ethtool_ops gve_ethtool_ops;
 /* needed by ethtool */
+extern char gve_driver_name[];
 extern const char gve_version_str[];
 #endif /* _GVE_H_ */
index cfd4b8d..233e594 100644 (file)
@@ -15,7 +15,7 @@ static void gve_get_drvinfo(struct net_device *netdev,
 {
        struct gve_priv *priv = netdev_priv(netdev);
 
-       strscpy(info->driver, "gve", sizeof(info->driver));
+       strscpy(info->driver, gve_driver_name, sizeof(info->driver));
        strscpy(info->version, gve_version_str, sizeof(info->version));
        strscpy(info->bus_info, pci_name(priv->pdev), sizeof(info->bus_info));
 }
@@ -590,6 +590,9 @@ static int gve_get_link_ksettings(struct net_device *netdev,
                err = gve_adminq_report_link_speed(priv);
 
        cmd->base.speed = priv->link_speed;
+
+       cmd->base.duplex = DUPLEX_FULL;
+
        return err;
 }
 
index 8fb70db..e6f1711 100644 (file)
@@ -33,6 +33,7 @@
 #define MIN_TX_TIMEOUT_GAP (1000 * 10)
 #define DQO_TX_MAX     0x3FFFF
 
+char gve_driver_name[] = "gve";
 const char gve_version_str[] = GVE_VERSION;
 static const char gve_version_prefix[] = GVE_VERSION_PREFIX;
 
@@ -2200,7 +2201,7 @@ static int gve_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (err)
                return err;
 
-       err = pci_request_regions(pdev, "gvnic-cfg");
+       err = pci_request_regions(pdev, gve_driver_name);
        if (err)
                goto abort_with_enabled;
 
@@ -2393,8 +2394,8 @@ static const struct pci_device_id gve_id_table[] = {
        { }
 };
 
-static struct pci_driver gvnic_driver = {
-       .name           = "gvnic",
+static struct pci_driver gve_driver = {
+       .name           = gve_driver_name,
        .id_table       = gve_id_table,
        .probe          = gve_probe,
        .remove         = gve_remove,
@@ -2405,10 +2406,10 @@ static struct pci_driver gvnic_driver = {
 #endif
 };
 
-module_pci_driver(gvnic_driver);
+module_pci_driver(gve_driver);
 
 MODULE_DEVICE_TABLE(pci, gve_id_table);
 MODULE_AUTHOR("Google, Inc.");
-MODULE_DESCRIPTION("gVNIC Driver");
+MODULE_DESCRIPTION("Google Virtual NIC Driver");
 MODULE_LICENSE("Dual MIT/GPL");
 MODULE_VERSION(GVE_VERSION);
index b99d752..514a20b 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/pci.h>
 #include <linux/pkt_sched.h>
 #include <linux/types.h>
+#include <linux/bitmap.h>
 #include <net/pkt_cls.h>
 #include <net/pkt_sched.h>
 
@@ -101,6 +102,7 @@ enum HNAE3_DEV_CAP_BITS {
        HNAE3_DEV_SUPPORT_FEC_STATS_B,
        HNAE3_DEV_SUPPORT_LANE_NUM_B,
        HNAE3_DEV_SUPPORT_WOL_B,
+       HNAE3_DEV_SUPPORT_TM_FLUSH_B,
 };
 
 #define hnae3_ae_dev_fd_supported(ae_dev) \
@@ -172,6 +174,9 @@ enum HNAE3_DEV_CAP_BITS {
 #define hnae3_ae_dev_wol_supported(ae_dev) \
        test_bit(HNAE3_DEV_SUPPORT_WOL_B, (ae_dev)->caps)
 
+#define hnae3_ae_dev_tm_flush_supported(hdev) \
+       test_bit(HNAE3_DEV_SUPPORT_TM_FLUSH_B, (hdev)->ae_dev->caps)
+
 enum HNAE3_PF_CAP_BITS {
        HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B = 0,
 };
@@ -407,7 +412,7 @@ struct hnae3_ae_dev {
        unsigned long hw_err_reset_req;
        struct hnae3_dev_specs dev_specs;
        u32 dev_version;
-       unsigned long caps[BITS_TO_LONGS(HNAE3_DEV_CAPS_MAX_NUM)];
+       DECLARE_BITMAP(caps, HNAE3_DEV_CAPS_MAX_NUM);
        void *priv;
 };
 
index b85c412..dcecb23 100644 (file)
@@ -156,6 +156,7 @@ static const struct hclge_comm_caps_bit_map hclge_pf_cmd_caps[] = {
        {HCLGE_COMM_CAP_FEC_STATS_B, HNAE3_DEV_SUPPORT_FEC_STATS_B},
        {HCLGE_COMM_CAP_LANE_NUM_B, HNAE3_DEV_SUPPORT_LANE_NUM_B},
        {HCLGE_COMM_CAP_WOL_B, HNAE3_DEV_SUPPORT_WOL_B},
+       {HCLGE_COMM_CAP_TM_FLUSH_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B},
 };
 
 static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = {
@@ -172,6 +173,20 @@ static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = {
 };
 
 static void
+hclge_comm_capability_to_bitmap(unsigned long *bitmap, __le32 *caps)
+{
+       const unsigned int words = HCLGE_COMM_QUERY_CAP_LENGTH;
+       u32 val[HCLGE_COMM_QUERY_CAP_LENGTH];
+       unsigned int i;
+
+       for (i = 0; i < words; i++)
+               val[i] = __le32_to_cpu(caps[i]);
+
+       bitmap_from_arr32(bitmap, val,
+                         HCLGE_COMM_QUERY_CAP_LENGTH * BITS_PER_TYPE(u32));
+}
+
+static void
 hclge_comm_parse_capability(struct hnae3_ae_dev *ae_dev, bool is_pf,
                            struct hclge_comm_query_version_cmd *cmd)
 {
@@ -179,11 +194,12 @@ hclge_comm_parse_capability(struct hnae3_ae_dev *ae_dev, bool is_pf,
                                is_pf ? hclge_pf_cmd_caps : hclge_vf_cmd_caps;
        u32 size = is_pf ? ARRAY_SIZE(hclge_pf_cmd_caps) :
                                ARRAY_SIZE(hclge_vf_cmd_caps);
-       u32 caps, i;
+       DECLARE_BITMAP(caps, HCLGE_COMM_QUERY_CAP_LENGTH * BITS_PER_TYPE(u32));
+       u32 i;
 
-       caps = __le32_to_cpu(cmd->caps[0]);
+       hclge_comm_capability_to_bitmap(caps, cmd->caps);
        for (i = 0; i < size; i++)
-               if (hnae3_get_bit(caps, caps_map[i].imp_bit))
+               if (test_bit(caps_map[i].imp_bit, caps))
                        set_bit(caps_map[i].local_bit, ae_dev->caps);
 }
 
index 18f1b4b..2b7197c 100644 (file)
@@ -153,6 +153,7 @@ enum hclge_opcode_type {
        HCLGE_OPC_TM_INTERNAL_STS       = 0x0850,
        HCLGE_OPC_TM_INTERNAL_CNT       = 0x0851,
        HCLGE_OPC_TM_INTERNAL_STS_1     = 0x0852,
+       HCLGE_OPC_TM_FLUSH              = 0x0872,
 
        /* Packet buffer allocate commands */
        HCLGE_OPC_TX_BUFF_ALLOC         = 0x0901,
@@ -349,6 +350,7 @@ enum HCLGE_COMM_CAP_BITS {
        HCLGE_COMM_CAP_FEC_STATS_B = 25,
        HCLGE_COMM_CAP_LANE_NUM_B = 27,
        HCLGE_COMM_CAP_WOL_B = 28,
+       HCLGE_COMM_CAP_TM_FLUSH_B = 31,
 };
 
 enum HCLGE_COMM_API_CAP_BITS {
index 6546cfe..f276b5e 100644 (file)
@@ -411,6 +411,9 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
        }, {
                .name = "support wake on lan",
                .cap_bit = HNAE3_DEV_SUPPORT_WOL_B,
+       }, {
+               .name = "support tm flush",
+               .cap_bit = HNAE3_DEV_SUPPORT_TM_FLUSH_B,
        }
 };
 
@@ -461,9 +464,9 @@ static void hns3_dbg_fill_content(char *content, u16 len,
                if (result) {
                        if (item_len < strlen(result[i]))
                                break;
-                       strscpy(pos, result[i], strlen(result[i]));
+                       memcpy(pos, result[i], strlen(result[i]));
                } else {
-                       strscpy(pos, items[i].name, strlen(items[i].name));
+                       memcpy(pos, items[i].name, strlen(items[i].name));
                }
                pos += item_len;
                len -= item_len;
index 9f68900..b7b51e5 100644 (file)
@@ -5854,6 +5854,9 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running)
        if (!if_running)
                return;
 
+       if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+               return;
+
        netif_carrier_off(ndev);
        netif_tx_disable(ndev);
 
@@ -5882,7 +5885,16 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running)
        if (!if_running)
                return;
 
-       hns3_nic_reset_all_ring(priv->ae_handle);
+       if (hns3_nic_resetting(ndev))
+               return;
+
+       if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+               return;
+
+       if (hns3_nic_reset_all_ring(priv->ae_handle))
+               return;
+
+       clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
 
        for (i = 0; i < priv->vector_num; i++)
                hns3_vector_enable(&priv->tqp_vector[i]);
index c4aded6..fad5a5f 100644 (file)
@@ -52,7 +52,10 @@ static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev,
 
        for (i = 0; i < HNAE3_MAX_TC; i++) {
                ets->prio_tc[i] = hdev->tm_info.prio_tc[i];
-               ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
+               if (i < hdev->tm_info.num_tc)
+                       ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
+               else
+                       ets->tc_tx_bw[i] = 0;
 
                if (hdev->tm_info.tc_info[i].tc_sch_mode ==
                    HCLGE_SCH_MODE_SP)
@@ -123,7 +126,8 @@ static u8 hclge_ets_tc_changed(struct hclge_dev *hdev, struct ieee_ets *ets,
 }
 
 static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev,
-                                      struct ieee_ets *ets, bool *changed)
+                                      struct ieee_ets *ets, bool *changed,
+                                      u8 tc_num)
 {
        bool has_ets_tc = false;
        u32 total_ets_bw = 0;
@@ -137,6 +141,13 @@ static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev,
                                *changed = true;
                        break;
                case IEEE_8021QAZ_TSA_ETS:
+                       if (i >= tc_num) {
+                               dev_err(&hdev->pdev->dev,
+                                       "tc%u is disabled, cannot set ets bw\n",
+                                       i);
+                               return -EINVAL;
+                       }
+
                        /* The hardware will switch to sp mode if bandwidth is
                         * 0, so limit ets bandwidth must be greater than 0.
                         */
@@ -176,7 +187,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
        if (ret)
                return ret;
 
-       ret = hclge_ets_sch_mode_validate(hdev, ets, changed);
+       ret = hclge_ets_sch_mode_validate(hdev, ets, changed, tc_num);
        if (ret)
                return ret;
 
@@ -216,6 +227,10 @@ static int hclge_notify_down_uinit(struct hclge_dev *hdev)
        if (ret)
                return ret;
 
+       ret = hclge_tm_flush_cfg(hdev, true);
+       if (ret)
+               return ret;
+
        return hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
 }
 
@@ -227,6 +242,10 @@ static int hclge_notify_init_up(struct hclge_dev *hdev)
        if (ret)
                return ret;
 
+       ret = hclge_tm_flush_cfg(hdev, false);
+       if (ret)
+               return ret;
+
        return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 }
 
@@ -313,6 +332,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
        struct net_device *netdev = h->kinfo.netdev;
        struct hclge_dev *hdev = vport->back;
        u8 i, j, pfc_map, *prio_tc;
+       int last_bad_ret = 0;
        int ret;
 
        if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
@@ -350,13 +370,28 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
        if (ret)
                return ret;
 
-       ret = hclge_buffer_alloc(hdev);
-       if (ret) {
-               hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+       ret = hclge_tm_flush_cfg(hdev, true);
+       if (ret)
                return ret;
-       }
 
-       return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+       /* No matter whether the following operations are performed
+        * successfully or not, disabling the tm flush and notify
+        * the network status to up are necessary.
+        * Do not return immediately.
+        */
+       ret = hclge_buffer_alloc(hdev);
+       if (ret)
+               last_bad_ret = ret;
+
+       ret = hclge_tm_flush_cfg(hdev, false);
+       if (ret)
+               last_bad_ret = ret;
+
+       ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+       if (ret)
+               last_bad_ret = ret;
+
+       return last_bad_ret;
 }
 
 static int hclge_ieee_setapp(struct hnae3_handle *h, struct dcb_app *app)
index 233c132..0fb2eae 100644 (file)
@@ -111,9 +111,9 @@ static void hclge_dbg_fill_content(char *content, u16 len,
                if (result) {
                        if (item_len < strlen(result[i]))
                                break;
-                       strscpy(pos, result[i], strlen(result[i]));
+                       memcpy(pos, result[i], strlen(result[i]));
                } else {
-                       strscpy(pos, items[i].name, strlen(items[i].name));
+                       memcpy(pos, items[i].name, strlen(items[i].name));
                }
                pos += item_len;
                len -= item_len;
@@ -693,8 +693,7 @@ static int hclge_dbg_dump_tc(struct hclge_dev *hdev, char *buf, int len)
        for (i = 0; i < HNAE3_MAX_TC; i++) {
                sch_mode_str = ets_weight->tc_weight[i] ? "dwrr" : "sp";
                pos += scnprintf(buf + pos, len - pos, "%u     %4s    %3u\n",
-                                i, sch_mode_str,
-                                hdev->tm_info.pg_info[0].tc_dwrr[i]);
+                                i, sch_mode_str, ets_weight->tc_weight[i]);
        }
 
        return 0;
index bf675c1..a940e35 100644 (file)
@@ -72,6 +72,8 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev);
 static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
 static void hclge_sync_fd_table(struct hclge_dev *hdev);
 static void hclge_update_fec_stats(struct hclge_dev *hdev);
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+                                     int wait_cnt);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -7558,6 +7560,8 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
 
 static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
 {
+#define HCLGE_LINK_STATUS_WAIT_CNT  3
+
        struct hclge_desc desc;
        struct hclge_config_mac_mode_cmd *req =
                (struct hclge_config_mac_mode_cmd *)desc.data;
@@ -7582,9 +7586,15 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
        req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
        ret = hclge_cmd_send(&hdev->hw, &desc, 1);
-       if (ret)
+       if (ret) {
                dev_err(&hdev->pdev->dev,
                        "mac enable fail, ret =%d.\n", ret);
+               return;
+       }
+
+       if (!enable)
+               hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
+                                          HCLGE_LINK_STATUS_WAIT_CNT);
 }
 
 static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
@@ -7647,10 +7657,9 @@ static void hclge_phy_link_status_wait(struct hclge_dev *hdev,
        } while (++i < HCLGE_PHY_LINK_STATUS_NUM);
 }
 
-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+                                     int wait_cnt)
 {
-#define HCLGE_MAC_LINK_STATUS_NUM  100
-
        int link_status;
        int i = 0;
        int ret;
@@ -7663,13 +7672,15 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
                        return 0;
 
                msleep(HCLGE_LINK_STATUS_MS);
-       } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
+       } while (++i < wait_cnt);
        return -EBUSY;
 }
 
 static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
                                          bool is_phy)
 {
+#define HCLGE_MAC_LINK_STATUS_NUM  100
+
        int link_ret;
 
        link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
@@ -7677,7 +7688,8 @@ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
        if (is_phy)
                hclge_phy_link_status_wait(hdev, link_ret);
 
-       return hclge_mac_link_status_wait(hdev, link_ret);
+       return hclge_mac_link_status_wait(hdev, link_ret,
+                                         HCLGE_MAC_LINK_STATUS_NUM);
 }
 
 static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
@@ -10915,9 +10927,12 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev)
        u32 rx_pause, tx_pause;
        u8 flowctl;
 
-       if (!phydev->link || !phydev->autoneg)
+       if (!phydev->link)
                return 0;
 
+       if (!phydev->autoneg)
+               return hclge_mac_pause_setup_hw(hdev);
+
        local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising);
 
        if (phydev->pause)
index 922c0da..c58c312 100644 (file)
@@ -785,6 +785,7 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
 static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
 {
 #define BW_PERCENT     100
+#define DEFAULT_BW_WEIGHT      1
 
        u8 i;
 
@@ -806,7 +807,7 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
                for (k = 0; k < hdev->tm_info.num_tc; k++)
                        hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT;
                for (; k < HNAE3_MAX_TC; k++)
-                       hdev->tm_info.pg_info[i].tc_dwrr[k] = 0;
+                       hdev->tm_info.pg_info[i].tc_dwrr[k] = DEFAULT_BW_WEIGHT;
        }
 }
 
@@ -1484,7 +1485,11 @@ int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
                return ret;
 
        /* Cfg schd mode for each level schd */
-       return hclge_tm_schd_mode_hw(hdev);
+       ret = hclge_tm_schd_mode_hw(hdev);
+       if (ret)
+               return ret;
+
+       return hclge_tm_flush_cfg(hdev, false);
 }
 
 static int hclge_pause_param_setup_hw(struct hclge_dev *hdev)
@@ -1548,7 +1553,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
        return 0;
 }
 
-static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
 {
        bool tx_en, rx_en;
 
@@ -2113,3 +2118,28 @@ int hclge_tm_get_port_shaper(struct hclge_dev *hdev,
 
        return 0;
 }
+
+int hclge_tm_flush_cfg(struct hclge_dev *hdev, bool enable)
+{
+       struct hclge_desc desc;
+       int ret;
+
+       if (!hnae3_ae_dev_tm_flush_supported(hdev))
+               return 0;
+
+       hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_FLUSH, false);
+
+       desc.data[0] = cpu_to_le32(enable ? HCLGE_TM_FLUSH_EN_MSK : 0);
+
+       ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+       if (ret) {
+               dev_err(&hdev->pdev->dev,
+                       "failed to config tm flush, ret = %d\n", ret);
+               return ret;
+       }
+
+       if (enable)
+               msleep(HCLGE_TM_FLUSH_TIME_MS);
+
+       return ret;
+}
index dd6f1fd..53eec6d 100644 (file)
@@ -33,6 +33,9 @@ enum hclge_opcode_type;
 #define HCLGE_DSCP_MAP_TC_BD_NUM       2
 #define HCLGE_DSCP_TC_SHIFT(n)         (((n) & 1) * 4)
 
+#define HCLGE_TM_FLUSH_TIME_MS 10
+#define HCLGE_TM_FLUSH_EN_MSK  BIT(0)
+
 struct hclge_pg_to_pri_link_cmd {
        u8 pg_id;
        u8 rsvd1[3];
@@ -242,6 +245,7 @@ int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
                           u8 pfc_bitmap);
 int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
 int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev);
 void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
 void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
 int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
@@ -272,4 +276,5 @@ int hclge_tm_get_port_shaper(struct hclge_dev *hdev,
                             struct hclge_tm_shaper_para *para);
 int hclge_up_to_tc_map(struct hclge_dev *hdev);
 int hclge_dscp_to_tc_map(struct hclge_dev *hdev);
+int hclge_tm_flush_cfg(struct hclge_dev *hdev, bool enable);
 #endif
index 113fcb3..832a2ae 100644 (file)
@@ -203,7 +203,7 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
        unsigned long offset;
 
        for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
-               asm("dcbfl %0,%1" :: "b" (addr), "r" (offset));
+               asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset));
 }
 
 /* replenish the buffers for a pool.  note that we don't need to
index 763d613..df76cda 100644 (file)
@@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_adapter *,
 static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
                                        struct ibmvnic_sub_crq_queue *);
 static int ibmvnic_poll(struct napi_struct *napi, int data);
+static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
+static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
 static void send_query_map(struct ibmvnic_adapter *adapter);
 static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
 static int send_request_unmap(struct ibmvnic_adapter *, u8);
@@ -114,6 +116,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
 static void free_long_term_buff(struct ibmvnic_adapter *adapter,
                                struct ibmvnic_long_term_buff *ltb);
 static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
+static void flush_reset_queue(struct ibmvnic_adapter *adapter);
 
 struct ibmvnic_stat {
        char name[ETH_GSTRING_LEN];
@@ -1505,8 +1508,8 @@ static const char *adapter_state_to_string(enum vnic_state state)
 
 static int ibmvnic_login(struct net_device *netdev)
 {
+       unsigned long flags, timeout = msecs_to_jiffies(20000);
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-       unsigned long timeout = msecs_to_jiffies(20000);
        int retry_count = 0;
        int retries = 10;
        bool retry;
@@ -1527,11 +1530,9 @@ static int ibmvnic_login(struct net_device *netdev)
 
                if (!wait_for_completion_timeout(&adapter->init_done,
                                                 timeout)) {
-                       netdev_warn(netdev, "Login timed out, retrying...\n");
-                       retry = true;
-                       adapter->init_done_rc = 0;
-                       retry_count++;
-                       continue;
+                       netdev_warn(netdev, "Login timed out\n");
+                       adapter->login_pending = false;
+                       goto partial_reset;
                }
 
                if (adapter->init_done_rc == ABORTED) {
@@ -1573,10 +1574,69 @@ static int ibmvnic_login(struct net_device *netdev)
                                            "SCRQ irq initialization failed\n");
                                return rc;
                        }
+               /* Default/timeout error handling, reset and start fresh */
                } else if (adapter->init_done_rc) {
                        netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
                                    adapter->init_done_rc);
-                       return -EIO;
+
+partial_reset:
+                       /* adapter login failed, so free any CRQs or sub-CRQs
+                        * and register again before attempting to login again.
+                        * If we don't do this then the VIOS may think that
+                        * we are already logged in and reject any subsequent
+                        * attempts
+                        */
+                       netdev_warn(netdev,
+                                   "Freeing and re-registering CRQs before attempting to login again\n");
+                       retry = true;
+                       adapter->init_done_rc = 0;
+                       release_sub_crqs(adapter, true);
+                       /* Much of this is similar logic as ibmvnic_probe(),
+                        * we are essentially re-initializing communication
+                        * with the server. We really should not run any
+                        * resets/failovers here because this is already a form
+                        * of reset and we do not want parallel resets occurring
+                        */
+                       do {
+                               reinit_init_done(adapter);
+                               /* Clear any failovers we got in the previous
+                                * pass since we are re-initializing the CRQ
+                                */
+                               adapter->failover_pending = false;
+                               release_crq_queue(adapter);
+                               /* If we don't sleep here then we risk an
+                                * unnecessary failover event from the VIOS.
+                                * This is a known VIOS issue caused by a vnic
+                                * device freeing and registering a CRQ too
+                                * quickly.
+                                */
+                               msleep(1500);
+                               /* Avoid any resets, since we are currently
+                                * resetting.
+                                */
+                               spin_lock_irqsave(&adapter->rwi_lock, flags);
+                               flush_reset_queue(adapter);
+                               spin_unlock_irqrestore(&adapter->rwi_lock,
+                                                      flags);
+
+                               rc = init_crq_queue(adapter);
+                               if (rc) {
+                                       netdev_err(netdev, "login recovery: init CRQ failed %d\n",
+                                                  rc);
+                                       return -EIO;
+                               }
+
+                               rc = ibmvnic_reset_init(adapter, false);
+                               if (rc)
+                                       netdev_err(netdev, "login recovery: Reset init failed %d\n",
+                                                  rc);
+                               /* IBMVNIC_CRQ_INIT will return EAGAIN if it
+                                * fails, since ibmvnic_reset_init will free
+                                * irq's in failure, we won't be able to receive
+                                * new CRQs so we need to keep trying. probe()
+                                * handles this similarly.
+                                */
+                       } while (rc == -EAGAIN && retry_count++ < retries);
                }
        } while (retry);
 
@@ -1588,12 +1648,22 @@ static int ibmvnic_login(struct net_device *netdev)
 
 static void release_login_buffer(struct ibmvnic_adapter *adapter)
 {
+       if (!adapter->login_buf)
+               return;
+
+       dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
+                        adapter->login_buf_sz, DMA_TO_DEVICE);
        kfree(adapter->login_buf);
        adapter->login_buf = NULL;
 }
 
 static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
 {
+       if (!adapter->login_rsp_buf)
+               return;
+
+       dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
+                        adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
        kfree(adapter->login_rsp_buf);
        adapter->login_rsp_buf = NULL;
 }
@@ -4830,11 +4900,14 @@ static int send_login(struct ibmvnic_adapter *adapter)
        if (rc) {
                adapter->login_pending = false;
                netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
-               goto buf_rsp_map_failed;
+               goto buf_send_failed;
        }
 
        return 0;
 
+buf_send_failed:
+       dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
+                        DMA_FROM_DEVICE);
 buf_rsp_map_failed:
        kfree(login_rsp_buffer);
        adapter->login_rsp_buf = NULL;
@@ -5396,6 +5469,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
        int num_tx_pools;
        int num_rx_pools;
        u64 *size_array;
+       u32 rsp_len;
        int i;
 
        /* CHECK: Test/set of login_pending does not need to be atomic
@@ -5407,11 +5481,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
        }
        adapter->login_pending = false;
 
-       dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
-                        DMA_TO_DEVICE);
-       dma_unmap_single(dev, adapter->login_rsp_buf_token,
-                        adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
-
        /* If the number of queues requested can't be allocated by the
         * server, the login response will return with code 1. We will need
         * to resend the login buffer with fewer queues requested.
@@ -5447,6 +5516,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
                ibmvnic_reset(adapter, VNIC_RESET_FATAL);
                return -EIO;
        }
+
+       rsp_len = be32_to_cpu(login_rsp->len);
+       if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
+           rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
+           rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
+               /* This can happen if a login request times out and there are
+                * 2 outstanding login requests sent, the LOGIN_RSP crq
+                * could have been for the older login request. So we are
+                * parsing the newer response buffer which may be incomplete
+                */
+               dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
+               ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+               return -EIO;
+       }
+
        size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
                be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
        /* variable buffer sizes are not supported, so just read the
index 9954493..62497f5 100644 (file)
@@ -1839,7 +1839,7 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf)
 void i40e_dbg_init(void)
 {
        i40e_dbg_root = debugfs_create_dir(i40e_driver_name, NULL);
-       if (!i40e_dbg_root)
+       if (IS_ERR(i40e_dbg_root))
                pr_info("init of debugfs failed\n");
 }
 
index 29ad179..a86bfa3 100644 (file)
@@ -2609,7 +2609,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
                        retval = i40e_correct_mac_vlan_filters
                                (vsi, &tmp_add_list, &tmp_del_list,
                                 vlan_filters);
-               else
+               else if (pf->vf)
                        retval = i40e_correct_vf_mac_vlan_filters
                                (vsi, &tmp_add_list, &tmp_del_list,
                                 vlan_filters, pf->vf[vsi->vf_id].trusted);
@@ -2782,7 +2782,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
        }
 
        /* if the VF is not trusted do not do promisc */
-       if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) {
+       if (vsi->type == I40E_VSI_SRIOV && pf->vf &&
+           !pf->vf[vsi->vf_id].trusted) {
                clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
                goto out;
        }
index 9da0c87..f99c1f7 100644 (file)
@@ -210,11 +210,11 @@ read_nvm_exit:
  * @hw: pointer to the HW structure.
  * @module_pointer: module pointer location in words from the NVM beginning
  * @offset: offset in words from module start
- * @words: number of words to write
- * @data: buffer with words to write to the Shadow RAM
+ * @words: number of words to read
+ * @data: buffer with words to read to the Shadow RAM
  * @last_command: tells the AdminQ that this is the last command
  *
- * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
+ * Reads a 16 bit words buffer to the Shadow RAM using the admin command.
  **/
 static int i40e_read_nvm_aq(struct i40e_hw *hw,
                            u8 module_pointer, u32 offset,
@@ -234,18 +234,18 @@ static int i40e_read_nvm_aq(struct i40e_hw *hw,
         */
        if ((offset + words) > hw->nvm.sr_size)
                i40e_debug(hw, I40E_DEBUG_NVM,
-                          "NVM write error: offset %d beyond Shadow RAM limit %d\n",
+                          "NVM read error: offset %d beyond Shadow RAM limit %d\n",
                           (offset + words), hw->nvm.sr_size);
        else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS)
-               /* We can write only up to 4KB (one sector), in one AQ write */
+               /* We can read only up to 4KB (one sector), in one AQ write */
                i40e_debug(hw, I40E_DEBUG_NVM,
-                          "NVM write fail error: tried to write %d words, limit is %d.\n",
+                          "NVM read fail error: tried to read %d words, limit is %d.\n",
                           words, I40E_SR_SECTOR_SIZE_IN_WORDS);
        else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS)
                 != (offset / I40E_SR_SECTOR_SIZE_IN_WORDS))
-               /* A single write cannot spread over two sectors */
+               /* A single read cannot spread over two sectors */
                i40e_debug(hw, I40E_DEBUG_NVM,
-                          "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n",
+                          "NVM read error: cannot spread over two sectors in a single read offset=%d words=%d\n",
                           offset, words);
        else
                ret_code = i40e_aq_read_nvm(hw, module_pointer,
index f80f273..8cbdebc 100644 (file)
@@ -255,8 +255,10 @@ struct iavf_adapter {
        struct workqueue_struct *wq;
        struct work_struct reset_task;
        struct work_struct adminq_task;
+       struct work_struct finish_config;
        struct delayed_work client_task;
        wait_queue_head_t down_waitqueue;
+       wait_queue_head_t reset_waitqueue;
        wait_queue_head_t vc_waitqueue;
        struct iavf_q_vector *q_vectors;
        struct list_head vlan_filter_list;
@@ -518,8 +520,9 @@ int iavf_up(struct iavf_adapter *adapter);
 void iavf_down(struct iavf_adapter *adapter);
 int iavf_process_config(struct iavf_adapter *adapter);
 int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter);
-void iavf_schedule_reset(struct iavf_adapter *adapter);
+void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags);
 void iavf_schedule_request_stats(struct iavf_adapter *adapter);
+void iavf_schedule_finish_config(struct iavf_adapter *adapter);
 void iavf_reset(struct iavf_adapter *adapter);
 void iavf_set_ethtool_ops(struct net_device *netdev);
 void iavf_update_stats(struct iavf_adapter *adapter);
@@ -582,4 +585,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
 void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
 struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
                                        const u8 *macaddr);
+int iavf_wait_for_reset(struct iavf_adapter *adapter);
 #endif /* _IAVF_H_ */
index 6f171d1..a34303a 100644 (file)
@@ -484,6 +484,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
        u32 orig_flags, new_flags, changed_flags;
+       int ret = 0;
        u32 i;
 
        orig_flags = READ_ONCE(adapter->flags);
@@ -531,12 +532,14 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
        /* issue a reset to force legacy-rx change to take effect */
        if (changed_flags & IAVF_FLAG_LEGACY_RX) {
                if (netif_running(netdev)) {
-                       adapter->flags |= IAVF_FLAG_RESET_NEEDED;
-                       queue_work(adapter->wq, &adapter->reset_task);
+                       iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+                       ret = iavf_wait_for_reset(adapter);
+                       if (ret)
+                               netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset");
                }
        }
 
-       return 0;
+       return ret;
 }
 
 /**
@@ -627,6 +630,7 @@ static int iavf_set_ringparam(struct net_device *netdev,
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
        u32 new_rx_count, new_tx_count;
+       int ret = 0;
 
        if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
                return -EINVAL;
@@ -671,11 +675,13 @@ static int iavf_set_ringparam(struct net_device *netdev,
        }
 
        if (netif_running(netdev)) {
-               adapter->flags |= IAVF_FLAG_RESET_NEEDED;
-               queue_work(adapter->wq, &adapter->reset_task);
+               iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+               ret = iavf_wait_for_reset(adapter);
+               if (ret)
+                       netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset");
        }
 
-       return 0;
+       return ret;
 }
 
 /**
@@ -1283,6 +1289,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
                fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
                fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos;
+               fltr->ip_ver = 4;
                break;
        case AH_V4_FLOW:
        case ESP_V4_FLOW:
@@ -1294,6 +1301,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst;
                fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi;
                fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos;
+               fltr->ip_ver = 4;
                break;
        case IPV4_USER_FLOW:
                fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
@@ -1306,6 +1314,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
                fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos;
                fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto;
+               fltr->ip_ver = 4;
                break;
        case TCP_V6_FLOW:
        case UDP_V6_FLOW:
@@ -1324,6 +1333,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
                fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
                fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass;
+               fltr->ip_ver = 6;
                break;
        case AH_V6_FLOW:
        case ESP_V6_FLOW:
@@ -1339,6 +1349,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                       sizeof(struct in6_addr));
                fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi;
                fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass;
+               fltr->ip_ver = 6;
                break;
        case IPV6_USER_FLOW:
                memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
@@ -1355,6 +1366,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
                fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass;
                fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+               fltr->ip_ver = 6;
                break;
        case ETHER_FLOW:
                fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto;
@@ -1365,6 +1377,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
                return -EINVAL;
        }
 
+       err = iavf_validate_fdir_fltr_masks(adapter, fltr);
+       if (err)
+               return err;
+
        if (iavf_fdir_is_dup_fltr(adapter, fltr))
                return -EEXIST;
 
@@ -1395,14 +1411,15 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
        if (fsp->flow_type & FLOW_MAC_EXT)
                return -EINVAL;
 
+       spin_lock_bh(&adapter->fdir_fltr_lock);
        if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
+               spin_unlock_bh(&adapter->fdir_fltr_lock);
                dev_err(&adapter->pdev->dev,
                        "Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
                        IAVF_MAX_FDIR_FILTERS);
                return -ENOSPC;
        }
 
-       spin_lock_bh(&adapter->fdir_fltr_lock);
        if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
                dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
                spin_unlock_bh(&adapter->fdir_fltr_lock);
@@ -1775,7 +1792,9 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
        case ETHTOOL_GRXCLSRLCNT:
                if (!FDIR_FLTR_SUPPORT(adapter))
                        break;
+               spin_lock_bh(&adapter->fdir_fltr_lock);
                cmd->rule_cnt = adapter->fdir_active_fltr;
+               spin_unlock_bh(&adapter->fdir_fltr_lock);
                cmd->data = IAVF_MAX_FDIR_FILTERS;
                ret = 0;
                break;
@@ -1830,7 +1849,7 @@ static int iavf_set_channels(struct net_device *netdev,
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
        u32 num_req = ch->combined_count;
-       int i;
+       int ret = 0;
 
        if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
            adapter->num_tc) {
@@ -1852,22 +1871,13 @@ static int iavf_set_channels(struct net_device *netdev,
 
        adapter->num_req_queues = num_req;
        adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
-       iavf_schedule_reset(adapter);
+       iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
 
-       /* wait for the reset is done */
-       for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) {
-               msleep(IAVF_RESET_WAIT_MS);
-               if (adapter->flags & IAVF_FLAG_RESET_PENDING)
-                       continue;
-               break;
-       }
-       if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
-               adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
-               adapter->num_active_queues = num_req;
-               return -EOPNOTSUPP;
-       }
+       ret = iavf_wait_for_reset(adapter);
+       if (ret)
+               netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset");
 
-       return 0;
+       return ret;
 }
 
 /**
index 6146203..03e774b 100644 (file)
@@ -18,6 +18,79 @@ static const struct in6_addr ipv6_addr_full_mask = {
        }
 };
 
+static const struct in6_addr ipv6_addr_zero_mask = {
+       .in6_u = {
+               .u6_addr8 = {
+                       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+               }
+       }
+};
+
+/**
+ * iavf_validate_fdir_fltr_masks - validate Flow Director filter fields masks
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns 0 if all masks of packet fields are either full or empty. Returns
+ * error on at least one partial mask.
+ */
+int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
+                                 struct iavf_fdir_fltr *fltr)
+{
+       if (fltr->eth_mask.etype && fltr->eth_mask.etype != htons(U16_MAX))
+               goto partial_mask;
+
+       if (fltr->ip_ver == 4) {
+               if (fltr->ip_mask.v4_addrs.src_ip &&
+                   fltr->ip_mask.v4_addrs.src_ip != htonl(U32_MAX))
+                       goto partial_mask;
+
+               if (fltr->ip_mask.v4_addrs.dst_ip &&
+                   fltr->ip_mask.v4_addrs.dst_ip != htonl(U32_MAX))
+                       goto partial_mask;
+
+               if (fltr->ip_mask.tos && fltr->ip_mask.tos != U8_MAX)
+                       goto partial_mask;
+       } else if (fltr->ip_ver == 6) {
+               if (memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_zero_mask,
+                          sizeof(struct in6_addr)) &&
+                   memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask,
+                          sizeof(struct in6_addr)))
+                       goto partial_mask;
+
+               if (memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_zero_mask,
+                          sizeof(struct in6_addr)) &&
+                   memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask,
+                          sizeof(struct in6_addr)))
+                       goto partial_mask;
+
+               if (fltr->ip_mask.tclass && fltr->ip_mask.tclass != U8_MAX)
+                       goto partial_mask;
+       }
+
+       if (fltr->ip_mask.proto && fltr->ip_mask.proto != U8_MAX)
+               goto partial_mask;
+
+       if (fltr->ip_mask.src_port && fltr->ip_mask.src_port != htons(U16_MAX))
+               goto partial_mask;
+
+       if (fltr->ip_mask.dst_port && fltr->ip_mask.dst_port != htons(U16_MAX))
+               goto partial_mask;
+
+       if (fltr->ip_mask.spi && fltr->ip_mask.spi != htonl(U32_MAX))
+               goto partial_mask;
+
+       if (fltr->ip_mask.l4_header &&
+           fltr->ip_mask.l4_header != htonl(U32_MAX))
+               goto partial_mask;
+
+       return 0;
+
+partial_mask:
+       dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, partial masks are not supported\n");
+       return -EOPNOTSUPP;
+}
+
 /**
  * iavf_pkt_udp_no_pay_len - the length of UDP packet without payload
  * @fltr: Flow Director filter data structure
@@ -263,8 +336,6 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
                VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
        }
 
-       fltr->ip_ver = 4;
-
        return 0;
 }
 
@@ -309,8 +380,6 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr,
                VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
        }
 
-       fltr->ip_ver = 6;
-
        return 0;
 }
 
@@ -722,7 +791,9 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
 bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
 {
        struct iavf_fdir_fltr *tmp;
+       bool ret = false;
 
+       spin_lock_bh(&adapter->fdir_fltr_lock);
        list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
                if (tmp->flow_type != fltr->flow_type)
                        continue;
@@ -732,11 +803,14 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
                    !memcmp(&tmp->ip_data, &fltr->ip_data,
                            sizeof(fltr->ip_data)) &&
                    !memcmp(&tmp->ext_data, &fltr->ext_data,
-                           sizeof(fltr->ext_data)))
-                       return true;
+                           sizeof(fltr->ext_data))) {
+                       ret = true;
+                       break;
+               }
        }
+       spin_unlock_bh(&adapter->fdir_fltr_lock);
 
-       return false;
+       return ret;
 }
 
 /**
index 33c55c3..9eb9f73 100644 (file)
@@ -110,6 +110,8 @@ struct iavf_fdir_fltr {
        struct virtchnl_fdir_add vc_add_msg;
 };
 
+int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
+                                 struct iavf_fdir_fltr *fltr);
 int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
 void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
 bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
index a483eb1..9610ca7 100644 (file)
@@ -167,6 +167,45 @@ static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev)
 }
 
 /**
+ * iavf_is_reset_in_progress - Check if a reset is in progress
+ * @adapter: board private structure
+ */
+static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter)
+{
+       if (adapter->state == __IAVF_RESETTING ||
+           adapter->flags & (IAVF_FLAG_RESET_PENDING |
+                             IAVF_FLAG_RESET_NEEDED))
+               return true;
+
+       return false;
+}
+
+/**
+ * iavf_wait_for_reset - Wait for reset to finish.
+ * @adapter: board private structure
+ *
+ * Returns 0 if reset finished successfully, negative on timeout or interrupt.
+ */
+int iavf_wait_for_reset(struct iavf_adapter *adapter)
+{
+       int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue,
+                                       !iavf_is_reset_in_progress(adapter),
+                                       msecs_to_jiffies(5000));
+
+       /* If ret < 0 then it means wait was interrupted.
+        * If ret == 0 then it means we got a timeout while waiting
+        * for reset to finish.
+        * If ret > 0 it means reset has finished.
+        */
+       if (ret > 0)
+               return 0;
+       else if (ret < 0)
+               return -EINTR;
+       else
+               return -EBUSY;
+}
+
+/**
  * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code
  * @hw:   pointer to the HW structure
  * @mem:  ptr to mem struct to fill out
@@ -262,12 +301,14 @@ static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
 /**
  * iavf_schedule_reset - Set the flags and schedule a reset event
  * @adapter: board private structure
+ * @flags: IAVF_FLAG_RESET_PENDING or IAVF_FLAG_RESET_NEEDED
  **/
-void iavf_schedule_reset(struct iavf_adapter *adapter)
+void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags)
 {
-       if (!(adapter->flags &
-             (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
-               adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+       if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
+           !(adapter->flags &
+           (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
+               adapter->flags |= flags;
                queue_work(adapter->wq, &adapter->reset_task);
        }
 }
@@ -295,7 +336,7 @@ static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
        struct iavf_adapter *adapter = netdev_priv(netdev);
 
        adapter->tx_timeout_count++;
-       iavf_schedule_reset(adapter);
+       iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
 }
 
 /**
@@ -1651,10 +1692,10 @@ static int iavf_set_interrupt_capability(struct iavf_adapter *adapter)
                adapter->msix_entries[vector].entry = vector;
 
        err = iavf_acquire_msix_vectors(adapter, v_budget);
+       if (!err)
+               iavf_schedule_finish_config(adapter);
 
 out:
-       netif_set_real_num_rx_queues(adapter->netdev, pairs);
-       netif_set_real_num_tx_queues(adapter->netdev, pairs);
        return err;
 }
 
@@ -1828,19 +1869,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
 static void iavf_free_q_vectors(struct iavf_adapter *adapter)
 {
        int q_idx, num_q_vectors;
-       int napi_vectors;
 
        if (!adapter->q_vectors)
                return;
 
        num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
-       napi_vectors = adapter->num_active_queues;
 
        for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
                struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
 
-               if (q_idx < napi_vectors)
-                       netif_napi_del(&q_vector->napi);
+               netif_napi_del(&q_vector->napi);
        }
        kfree(adapter->q_vectors);
        adapter->q_vectors = NULL;
@@ -1877,9 +1915,7 @@ static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
                goto err_alloc_queues;
        }
 
-       rtnl_lock();
        err = iavf_set_interrupt_capability(adapter);
-       rtnl_unlock();
        if (err) {
                dev_err(&adapter->pdev->dev,
                        "Unable to setup interrupt capabilities\n");
@@ -1932,15 +1968,16 @@ static void iavf_free_rss(struct iavf_adapter *adapter)
 /**
  * iavf_reinit_interrupt_scheme - Reallocate queues and vectors
  * @adapter: board private structure
+ * @running: true if adapter->state == __IAVF_RUNNING
  *
  * Returns 0 on success, negative on failure
  **/
-static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter)
+static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool running)
 {
        struct net_device *netdev = adapter->netdev;
        int err;
 
-       if (netif_running(netdev))
+       if (running)
                iavf_free_traffic_irqs(adapter);
        iavf_free_misc_irq(adapter);
        iavf_reset_interrupt_capability(adapter);
@@ -1965,6 +2002,78 @@ err:
 }
 
 /**
+ * iavf_finish_config - do all netdev work that needs RTNL
+ * @work: our work_struct
+ *
+ * Do work that needs both RTNL and crit_lock.
+ **/
+static void iavf_finish_config(struct work_struct *work)
+{
+       struct iavf_adapter *adapter;
+       int pairs, err;
+
+       adapter = container_of(work, struct iavf_adapter, finish_config);
+
+       /* Always take RTNL first to prevent circular lock dependency */
+       rtnl_lock();
+       mutex_lock(&adapter->crit_lock);
+
+       if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
+           adapter->netdev_registered &&
+           !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
+               netdev_update_features(adapter->netdev);
+               adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+       }
+
+       switch (adapter->state) {
+       case __IAVF_DOWN:
+               if (!adapter->netdev_registered) {
+                       err = register_netdevice(adapter->netdev);
+                       if (err) {
+                               dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
+                                       err);
+
+                               /* go back and try again.*/
+                               iavf_free_rss(adapter);
+                               iavf_free_misc_irq(adapter);
+                               iavf_reset_interrupt_capability(adapter);
+                               iavf_change_state(adapter,
+                                                 __IAVF_INIT_CONFIG_ADAPTER);
+                               goto out;
+                       }
+                       adapter->netdev_registered = true;
+               }
+
+               /* Set the real number of queues when reset occurs while
+                * state == __IAVF_DOWN
+                */
+               fallthrough;
+       case __IAVF_RUNNING:
+               pairs = adapter->num_active_queues;
+               netif_set_real_num_rx_queues(adapter->netdev, pairs);
+               netif_set_real_num_tx_queues(adapter->netdev, pairs);
+               break;
+
+       default:
+               break;
+       }
+
+out:
+       mutex_unlock(&adapter->crit_lock);
+       rtnl_unlock();
+}
+
+/**
+ * iavf_schedule_finish_config - Set the flags and schedule a reset event
+ * @adapter: board private structure
+ **/
+void iavf_schedule_finish_config(struct iavf_adapter *adapter)
+{
+       if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+               queue_work(adapter->wq, &adapter->finish_config);
+}
+
+/**
  * iavf_process_aq_command - process aq_required flags
  * and sends aq command
  * @adapter: pointer to iavf adapter structure
@@ -2371,7 +2480,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
                        adapter->vsi_res->num_queue_pairs);
                adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED;
                adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
-               iavf_schedule_reset(adapter);
+               iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
 
                return -EAGAIN;
        }
@@ -2601,22 +2710,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
 
        netif_carrier_off(netdev);
        adapter->link_up = false;
-
-       /* set the semaphore to prevent any callbacks after device registration
-        * up to time when state of driver will be set to __IAVF_DOWN
-        */
-       rtnl_lock();
-       if (!adapter->netdev_registered) {
-               err = register_netdevice(netdev);
-               if (err) {
-                       rtnl_unlock();
-                       goto err_register;
-               }
-       }
-
-       adapter->netdev_registered = true;
-
        netif_tx_stop_all_queues(netdev);
+
        if (CLIENT_ALLOWED(adapter)) {
                err = iavf_lan_add_device(adapter);
                if (err)
@@ -2629,7 +2724,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
 
        iavf_change_state(adapter, __IAVF_DOWN);
        set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
-       rtnl_unlock();
 
        iavf_misc_irq_enable(adapter);
        wake_up(&adapter->down_waitqueue);
@@ -2649,10 +2743,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
                /* request initial VLAN offload settings */
                iavf_set_vlan_offload_features(adapter, 0, netdev->features);
 
+       iavf_schedule_finish_config(adapter);
        return;
+
 err_mem:
        iavf_free_rss(adapter);
-err_register:
        iavf_free_misc_irq(adapter);
 err_sw_init:
        iavf_reset_interrupt_capability(adapter);
@@ -2679,26 +2774,9 @@ static void iavf_watchdog_task(struct work_struct *work)
                goto restart_watchdog;
        }
 
-       if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
-           adapter->netdev_registered &&
-           !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
-           rtnl_trylock()) {
-               netdev_update_features(adapter->netdev);
-               rtnl_unlock();
-               adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
-       }
-
        if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
                iavf_change_state(adapter, __IAVF_COMM_FAILED);
 
-       if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
-               adapter->aq_required = 0;
-               adapter->current_op = VIRTCHNL_OP_UNKNOWN;
-               mutex_unlock(&adapter->crit_lock);
-               queue_work(adapter->wq, &adapter->reset_task);
-               return;
-       }
-
        switch (adapter->state) {
        case __IAVF_STARTUP:
                iavf_startup(adapter);
@@ -2826,11 +2904,10 @@ static void iavf_watchdog_task(struct work_struct *work)
        /* check for hw reset */
        reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
        if (!reg_val) {
-               adapter->flags |= IAVF_FLAG_RESET_PENDING;
                adapter->aq_required = 0;
                adapter->current_op = VIRTCHNL_OP_UNKNOWN;
                dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
-               queue_work(adapter->wq, &adapter->reset_task);
+               iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
                mutex_unlock(&adapter->crit_lock);
                queue_delayed_work(adapter->wq,
                                   &adapter->watchdog_task, HZ * 2);
@@ -2940,11 +3017,6 @@ static void iavf_reset_task(struct work_struct *work)
        int i = 0, err;
        bool running;
 
-       /* Detach interface to avoid subsequent NDO callbacks */
-       rtnl_lock();
-       netif_device_detach(netdev);
-       rtnl_unlock();
-
        /* When device is being removed it doesn't make sense to run the reset
         * task, just return in such a case.
         */
@@ -2952,7 +3024,7 @@ static void iavf_reset_task(struct work_struct *work)
                if (adapter->state != __IAVF_REMOVE)
                        queue_work(adapter->wq, &adapter->reset_task);
 
-               goto reset_finish;
+               return;
        }
 
        while (!mutex_trylock(&adapter->client_lock))
@@ -3010,11 +3082,6 @@ static void iavf_reset_task(struct work_struct *work)
                iavf_disable_vf(adapter);
                mutex_unlock(&adapter->client_lock);
                mutex_unlock(&adapter->crit_lock);
-               if (netif_running(netdev)) {
-                       rtnl_lock();
-                       dev_close(netdev);
-                       rtnl_unlock();
-               }
                return; /* Do not attempt to reinit. It's dead, Jim. */
        }
 
@@ -3056,7 +3123,7 @@ continue_reset:
 
        if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
            (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
-               err = iavf_reinit_interrupt_scheme(adapter);
+               err = iavf_reinit_interrupt_scheme(adapter, running);
                if (err)
                        goto reset_err;
        }
@@ -3151,10 +3218,11 @@ continue_reset:
 
        adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
 
+       wake_up(&adapter->reset_waitqueue);
        mutex_unlock(&adapter->client_lock);
        mutex_unlock(&adapter->crit_lock);
 
-       goto reset_finish;
+       return;
 reset_err:
        if (running) {
                set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
@@ -3164,21 +3232,7 @@ reset_err:
 
        mutex_unlock(&adapter->client_lock);
        mutex_unlock(&adapter->crit_lock);
-
-       if (netif_running(netdev)) {
-               /* Close device to ensure that Tx queues will not be started
-                * during netif_device_attach() at the end of the reset task.
-                */
-               rtnl_lock();
-               dev_close(netdev);
-               rtnl_unlock();
-       }
-
        dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
-reset_finish:
-       rtnl_lock();
-       netif_device_attach(netdev);
-       rtnl_unlock();
 }
 
 /**
@@ -3196,9 +3250,6 @@ static void iavf_adminq_task(struct work_struct *work)
        u32 val, oldval;
        u16 pending;
 
-       if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
-               goto out;
-
        if (!mutex_trylock(&adapter->crit_lock)) {
                if (adapter->state == __IAVF_REMOVE)
                        return;
@@ -3207,10 +3258,13 @@ static void iavf_adminq_task(struct work_struct *work)
                goto out;
        }
 
+       if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+               goto unlock;
+
        event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
        event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
        if (!event.msg_buf)
-               goto out;
+               goto unlock;
 
        do {
                ret = iavf_clean_arq_element(hw, &event, &pending);
@@ -3225,11 +3279,8 @@ static void iavf_adminq_task(struct work_struct *work)
                if (pending != 0)
                        memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
        } while (pending);
-       mutex_unlock(&adapter->crit_lock);
 
-       if ((adapter->flags &
-            (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
-           adapter->state == __IAVF_RESETTING)
+       if (iavf_is_reset_in_progress(adapter))
                goto freedom;
 
        /* check for error indications */
@@ -3271,6 +3322,8 @@ static void iavf_adminq_task(struct work_struct *work)
 
 freedom:
        kfree(event.msg_buf);
+unlock:
+       mutex_unlock(&adapter->crit_lock);
 out:
        /* re-enable Admin queue interrupt cause */
        iavf_misc_irq_enable(adapter);
@@ -4315,6 +4368,7 @@ static int iavf_close(struct net_device *netdev)
 static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
 {
        struct iavf_adapter *adapter = netdev_priv(netdev);
+       int ret = 0;
 
        netdev_dbg(netdev, "changing MTU from %d to %d\n",
                   netdev->mtu, new_mtu);
@@ -4325,11 +4379,15 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
        }
 
        if (netif_running(netdev)) {
-               adapter->flags |= IAVF_FLAG_RESET_NEEDED;
-               queue_work(adapter->wq, &adapter->reset_task);
+               iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+               ret = iavf_wait_for_reset(adapter);
+               if (ret < 0)
+                       netdev_warn(netdev, "MTU change interrupted waiting for reset");
+               else if (ret)
+                       netdev_warn(netdev, "MTU change timed out waiting for reset");
        }
 
-       return 0;
+       return ret;
 }
 
 #define NETIF_VLAN_OFFLOAD_FEATURES    (NETIF_F_HW_VLAN_CTAG_RX | \
@@ -4922,6 +4980,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        INIT_WORK(&adapter->reset_task, iavf_reset_task);
        INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
+       INIT_WORK(&adapter->finish_config, iavf_finish_config);
        INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
        INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
        queue_delayed_work(adapter->wq, &adapter->watchdog_task,
@@ -4930,6 +4989,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        /* Setup the wait queue for indicating transition to down status */
        init_waitqueue_head(&adapter->down_waitqueue);
 
+       /* Setup the wait queue for indicating transition to running state */
+       init_waitqueue_head(&adapter->reset_waitqueue);
+
        /* Setup the wait queue for indicating virtchannel events */
        init_waitqueue_head(&adapter->vc_waitqueue);
 
@@ -5061,13 +5123,15 @@ static void iavf_remove(struct pci_dev *pdev)
                usleep_range(500, 1000);
        }
        cancel_delayed_work_sync(&adapter->watchdog_task);
+       cancel_work_sync(&adapter->finish_config);
 
+       rtnl_lock();
        if (adapter->netdev_registered) {
-               rtnl_lock();
                unregister_netdevice(netdev);
                adapter->netdev_registered = false;
-               rtnl_unlock();
        }
+       rtnl_unlock();
+
        if (CLIENT_ALLOWED(adapter)) {
                err = iavf_lan_del_device(adapter);
                if (err)
index 7c0578b..be3c007 100644 (file)
@@ -1961,9 +1961,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
                case VIRTCHNL_EVENT_RESET_IMPENDING:
                        dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n");
                        if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) {
-                               adapter->flags |= IAVF_FLAG_RESET_PENDING;
                                dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
-                               queue_work(adapter->wq, &adapter->reset_task);
+                               iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
                        }
                        break;
                default:
@@ -2237,6 +2236,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
 
                iavf_process_config(adapter);
                adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
+               iavf_schedule_finish_config(adapter);
 
                iavf_set_queue_vlan_tag_loc(adapter);
 
@@ -2285,6 +2285,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
        case VIRTCHNL_OP_ENABLE_QUEUES:
                /* enable transmits */
                iavf_irq_enable(adapter, true);
+               wake_up(&adapter->reset_waitqueue);
                adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED;
                break;
        case VIRTCHNL_OP_DISABLE_QUEUES:
index 4a12316..074bf94 100644 (file)
@@ -435,7 +435,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
        /* Receive Packet Data Buffer Size.
         * The Packet Data Buffer Size is defined in 128 byte units.
         */
-       rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+       rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
+                                    BIT_ULL(ICE_RLAN_CTX_DBUF_S));
 
        /* use 32 byte descriptors */
        rlan_ctx.dsize = 1;
@@ -800,6 +801,8 @@ void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
 
        ice_for_each_q_vector(vsi, v_idx)
                ice_free_q_vector(vsi, v_idx);
+
+       vsi->num_q_vectors = 0;
 }
 
 /**
index ad0a007..8f232c4 100644 (file)
@@ -538,6 +538,12 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
                break;
        case DEVLINK_ESWITCH_MODE_SWITCHDEV:
        {
+               if (ice_is_adq_active(pf)) {
+                       dev_err(ice_pf_to_dev(pf), "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+                       NL_SET_ERR_MSG_MOD(extack, "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+                       return -EOPNOTSUPP;
+               }
+
                dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev",
                         pf->hw.pf_id);
                NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev");
index 8d5cbbd..ad4d470 100644 (file)
@@ -2681,8 +2681,13 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
 
        ring->rx_max_pending = ICE_MAX_NUM_DESC;
        ring->tx_max_pending = ICE_MAX_NUM_DESC;
-       ring->rx_pending = vsi->rx_rings[0]->count;
-       ring->tx_pending = vsi->tx_rings[0]->count;
+       if (vsi->tx_rings && vsi->rx_rings) {
+               ring->rx_pending = vsi->rx_rings[0]->count;
+               ring->tx_pending = vsi->tx_rings[0]->count;
+       } else {
+               ring->rx_pending = 0;
+               ring->tx_pending = 0;
+       }
 
        /* Rx mini and jumbo rings are not supported */
        ring->rx_mini_max_pending = 0;
@@ -2716,6 +2721,10 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
                return -EINVAL;
        }
 
+       /* Return if there is no rings (device is reloading) */
+       if (!vsi->tx_rings || !vsi->rx_rings)
+               return -EBUSY;
+
        new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
        if (new_tx_cnt != ring->tx_pending)
                netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
index ead6d50..8c6e13f 100644 (file)
@@ -1281,16 +1281,21 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
                                     ICE_FLOW_FLD_OFF_INVAL);
        }
 
-       /* add filter for outer headers */
        fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT);
+
+       assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter);
+
+       /* add filter for outer headers */
        ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx,
                                        ICE_FD_HW_SEG_NON_TUN);
-       if (ret == -EEXIST)
-               /* Rule already exists, free memory and continue */
-               devm_kfree(dev, seg);
-       else if (ret)
+       if (ret == -EEXIST) {
+               /* Rule already exists, free memory and count as success */
+               ret = 0;
+               goto err_exit;
+       } else if (ret) {
                /* could not write filter, free memory */
                goto err_exit;
+       }
 
        /* make tunneled filter HW entries if possible */
        memcpy(&tun_seg[1], seg, sizeof(*seg));
@@ -1305,18 +1310,13 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
                devm_kfree(dev, tun_seg);
        }
 
-       if (perfect_filter)
-               set_bit(fltr_idx, hw->fdir_perfect_fltr);
-       else
-               clear_bit(fltr_idx, hw->fdir_perfect_fltr);
-
        return ret;
 
 err_exit:
        devm_kfree(dev, tun_seg);
        devm_kfree(dev, seg);
 
-       return -EOPNOTSUPP;
+       return ret;
 }
 
 /**
@@ -1914,7 +1914,9 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
        input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
 
        /* input struct is added to the HW filter list */
-       ice_fdir_update_list_entry(pf, input, fsp->location);
+       ret = ice_fdir_update_list_entry(pf, input, fsp->location);
+       if (ret)
+               goto release_lock;
 
        ret = ice_fdir_write_all_fltr(pf, input, true);
        if (ret)
index 00e3afd..0054d7e 100644 (file)
@@ -2972,39 +2972,12 @@ int ice_vsi_release(struct ice_vsi *vsi)
                return -ENODEV;
        pf = vsi->back;
 
-       /* do not unregister while driver is in the reset recovery pending
-        * state. Since reset/rebuild happens through PF service task workqueue,
-        * it's not a good idea to unregister netdev that is associated to the
-        * PF that is running the work queue items currently. This is done to
-        * avoid check_flush_dependency() warning on this wq
-        */
-       if (vsi->netdev && !ice_is_reset_in_progress(pf->state) &&
-           (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) {
-               unregister_netdev(vsi->netdev);
-               clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
-       }
-
-       if (vsi->type == ICE_VSI_PF)
-               ice_devlink_destroy_pf_port(pf);
-
        if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
                ice_rss_clean(vsi);
 
        ice_vsi_close(vsi);
        ice_vsi_decfg(vsi);
 
-       if (vsi->netdev) {
-               if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) {
-                       unregister_netdev(vsi->netdev);
-                       clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
-               }
-               if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) {
-                       free_netdev(vsi->netdev);
-                       vsi->netdev = NULL;
-                       clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
-               }
-       }
-
        /* retain SW VSI data structure since it is needed to unregister and
         * free VSI netdev when PF is not in reset recovery pending state,\
         * for ex: during rmmod.
index 93979ab..b40dfe6 100644 (file)
@@ -4430,9 +4430,9 @@ static int ice_start_eth(struct ice_vsi *vsi)
        if (err)
                return err;
 
-       rtnl_lock();
        err = ice_vsi_open(vsi);
-       rtnl_unlock();
+       if (err)
+               ice_fltr_remove_all(vsi);
 
        return err;
 }
@@ -4895,6 +4895,7 @@ int ice_load(struct ice_pf *pf)
        params = ice_vsi_to_params(vsi);
        params.flags = ICE_VSI_FLAG_INIT;
 
+       rtnl_lock();
        err = ice_vsi_cfg(vsi, &params);
        if (err)
                goto err_vsi_cfg;
@@ -4902,6 +4903,7 @@ int ice_load(struct ice_pf *pf)
        err = ice_start_eth(ice_get_main_vsi(pf));
        if (err)
                goto err_start_eth;
+       rtnl_unlock();
 
        err = ice_init_rdma(pf);
        if (err)
@@ -4916,9 +4918,11 @@ int ice_load(struct ice_pf *pf)
 
 err_init_rdma:
        ice_vsi_close(ice_get_main_vsi(pf));
+       rtnl_lock();
 err_start_eth:
        ice_vsi_decfg(ice_get_main_vsi(pf));
 err_vsi_cfg:
+       rtnl_unlock();
        ice_deinit_dev(pf);
        return err;
 }
@@ -4931,8 +4935,10 @@ void ice_unload(struct ice_pf *pf)
 {
        ice_deinit_features(pf);
        ice_deinit_rdma(pf);
+       rtnl_lock();
        ice_stop_eth(ice_get_main_vsi(pf));
        ice_vsi_decfg(ice_get_main_vsi(pf));
+       rtnl_unlock();
        ice_deinit_dev(pf);
 }
 
@@ -5739,6 +5745,13 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
        q_handle = vsi->tx_rings[queue_index]->q_handle;
        tc = ice_dcb_get_tc(vsi, queue_index);
 
+       vsi = ice_locate_vsi_using_queue(vsi, queue_index);
+       if (!vsi) {
+               netdev_err(netdev, "Invalid VSI for given queue %d\n",
+                          queue_index);
+               return -EINVAL;
+       }
+
        /* Set BW back to default, when user set maxrate to 0 */
        if (!maxrate)
                status = ice_cfg_q_bw_dflt_lmt(vsi->port_info, vsi->idx, tc,
@@ -7872,10 +7885,10 @@ static int
 ice_validate_mqprio_qopt(struct ice_vsi *vsi,
                         struct tc_mqprio_qopt_offload *mqprio_qopt)
 {
-       u64 sum_max_rate = 0, sum_min_rate = 0;
        int non_power_of_2_qcount = 0;
        struct ice_pf *pf = vsi->back;
        int max_rss_q_cnt = 0;
+       u64 sum_min_rate = 0;
        struct device *dev;
        int i, speed;
        u8 num_tc;
@@ -7891,6 +7904,7 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
        dev = ice_pf_to_dev(pf);
        vsi->ch_rss_size = 0;
        num_tc = mqprio_qopt->qopt.num_tc;
+       speed = ice_get_link_speed_kbps(vsi);
 
        for (i = 0; num_tc; i++) {
                int qcount = mqprio_qopt->qopt.count[i];
@@ -7931,7 +7945,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
                 */
                max_rate = mqprio_qopt->max_rate[i];
                max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR);
-               sum_max_rate += max_rate;
 
                /* min_rate is minimum guaranteed rate and it can't be zero */
                min_rate = mqprio_qopt->min_rate[i];
@@ -7944,6 +7957,12 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
                        return -EINVAL;
                }
 
+               if (max_rate && max_rate > speed) {
+                       dev_err(dev, "TC%d: max_rate(%llu Kbps) > link speed of %u Kbps\n",
+                               i, max_rate, speed);
+                       return -EINVAL;
+               }
+
                iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem);
                if (rem) {
                        dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps",
@@ -7981,12 +8000,6 @@ ice_validate_mqprio_qopt(struct ice_vsi *vsi,
            (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
                return -EINVAL;
 
-       speed = ice_get_link_speed_kbps(vsi);
-       if (sum_max_rate && sum_max_rate > (u64)speed) {
-               dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n",
-                       sum_max_rate, speed);
-               return -EINVAL;
-       }
        if (sum_min_rate && sum_min_rate > (u64)speed) {
                dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n",
                        sum_min_rate, speed);
@@ -8800,6 +8813,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
 {
        struct ice_netdev_priv *np = netdev_priv(netdev);
        struct ice_pf *pf = np->vsi->back;
+       bool locked = false;
        int err;
 
        switch (type) {
@@ -8809,10 +8823,32 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
                                                  ice_setup_tc_block_cb,
                                                  np, np, true);
        case TC_SETUP_QDISC_MQPRIO:
+               if (ice_is_eswitch_mode_switchdev(pf)) {
+                       netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n");
+                       return -EOPNOTSUPP;
+               }
+
+               if (pf->adev) {
+                       mutex_lock(&pf->adev_mutex);
+                       device_lock(&pf->adev->dev);
+                       locked = true;
+                       if (pf->adev->dev.driver) {
+                               netdev_err(netdev, "Cannot change qdisc when RDMA is active\n");
+                               err = -EBUSY;
+                               goto adev_unlock;
+                       }
+               }
+
                /* setup traffic classifier for receive side */
                mutex_lock(&pf->tc_mutex);
                err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
                mutex_unlock(&pf->tc_mutex);
+
+adev_unlock:
+               if (locked) {
+                       device_unlock(&pf->adev->dev);
+                       mutex_unlock(&pf->adev_mutex);
+               }
                return err;
        default:
                return -EOPNOTSUPP;
index 1f66914..31314e7 100644 (file)
@@ -1131,7 +1131,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
        if (!vf)
                return -EINVAL;
 
-       ret = ice_check_vf_ready_for_reset(vf);
+       ret = ice_check_vf_ready_for_cfg(vf);
        if (ret)
                goto out_put_vf;
 
@@ -1246,7 +1246,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
                goto out_put_vf;
        }
 
-       ret = ice_check_vf_ready_for_reset(vf);
+       ret = ice_check_vf_ready_for_cfg(vf);
        if (ret)
                goto out_put_vf;
 
@@ -1300,7 +1300,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
                return -EOPNOTSUPP;
        }
 
-       ret = ice_check_vf_ready_for_reset(vf);
+       ret = ice_check_vf_ready_for_cfg(vf);
        if (ret)
                goto out_put_vf;
 
@@ -1613,7 +1613,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
        if (!vf)
                return -EINVAL;
 
-       ret = ice_check_vf_ready_for_reset(vf);
+       ret = ice_check_vf_ready_for_cfg(vf);
        if (ret)
                goto out_put_vf;
 
index b54052e..4a34ef5 100644 (file)
@@ -750,17 +750,16 @@ exit:
 /**
  * ice_locate_vsi_using_queue - locate VSI using queue (forward to queue action)
  * @vsi: Pointer to VSI
- * @tc_fltr: Pointer to tc_flower_filter
+ * @queue: Queue index
  *
- * Locate the VSI using specified queue. When ADQ is not enabled, always
- * return input VSI, otherwise locate corresponding VSI based on per channel
- * offset and qcount
+ * Locate the VSI using specified "queue". When ADQ is not enabled,
+ * always return input VSI, otherwise locate corresponding
+ * VSI based on per channel "offset" and "qcount"
  */
-static struct ice_vsi *
-ice_locate_vsi_using_queue(struct ice_vsi *vsi,
-                          struct ice_tc_flower_fltr *tc_fltr)
+struct ice_vsi *
+ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue)
 {
-       int num_tc, tc, queue;
+       int num_tc, tc;
 
        /* if ADQ is not active, passed VSI is the candidate VSI */
        if (!ice_is_adq_active(vsi->back))
@@ -770,7 +769,6 @@ ice_locate_vsi_using_queue(struct ice_vsi *vsi,
         * upon queue number)
         */
        num_tc = vsi->mqprio_qopt.qopt.num_tc;
-       queue = tc_fltr->action.fwd.q.queue;
 
        for (tc = 0; tc < num_tc; tc++) {
                int qcount = vsi->mqprio_qopt.qopt.count[tc];
@@ -812,6 +810,7 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr)
        struct ice_pf *pf = vsi->back;
        struct device *dev;
        u32 tc_class;
+       int q;
 
        dev = ice_pf_to_dev(pf);
 
@@ -840,7 +839,8 @@ ice_tc_forward_action(struct ice_vsi *vsi, struct ice_tc_flower_fltr *tc_fltr)
                /* Determine destination VSI even though the action is
                 * FWD_TO_QUEUE, because QUEUE is associated with VSI
                 */
-               dest_vsi = tc_fltr->dest_vsi;
+               q = tc_fltr->action.fwd.q.queue;
+               dest_vsi = ice_locate_vsi_using_queue(vsi, q);
                break;
        default:
                dev_err(dev,
@@ -1716,7 +1716,7 @@ ice_tc_forward_to_queue(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr,
        /* If ADQ is configured, and the queue belongs to ADQ VSI, then prepare
         * ADQ switch filter
         */
-       ch_vsi = ice_locate_vsi_using_queue(vsi, fltr);
+       ch_vsi = ice_locate_vsi_using_queue(vsi, fltr->action.fwd.q.queue);
        if (!ch_vsi)
                return -EINVAL;
        fltr->dest_vsi = ch_vsi;
index 8bbc1a6..65d3871 100644 (file)
@@ -204,6 +204,7 @@ static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf)
        return pf->num_dmac_chnl_fltrs;
 }
 
+struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue);
 int
 ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
                   struct flow_cls_offload *cls_flower);
index b26ce44..ea3310b 100644 (file)
@@ -186,25 +186,6 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
 }
 
 /**
- * ice_check_vf_ready_for_reset - check if VF is ready to be reset
- * @vf: VF to check if it's ready to be reset
- *
- * The purpose of this function is to ensure that the VF is not in reset,
- * disabled, and is both initialized and active, thus enabling us to safely
- * initialize another reset.
- */
-int ice_check_vf_ready_for_reset(struct ice_vf *vf)
-{
-       int ret;
-
-       ret = ice_check_vf_ready_for_cfg(vf);
-       if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
-               ret = -EAGAIN;
-
-       return ret;
-}
-
-/**
  * ice_trigger_vf_reset - Reset a VF on HW
  * @vf: pointer to the VF structure
  * @is_vflr: true if VFLR was issued, false if not
@@ -631,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
                return 0;
        }
 
+       if (flags & ICE_VF_RESET_LOCK)
+               mutex_lock(&vf->cfg_lock);
+       else
+               lockdep_assert_held(&vf->cfg_lock);
+
        if (ice_is_vf_disabled(vf)) {
                vsi = ice_get_vf_vsi(vf);
                if (!vsi) {
                        dev_dbg(dev, "VF is already removed\n");
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto out_unlock;
                }
                ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
 
@@ -644,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
 
                dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
                        vf->vf_id);
-               return 0;
+               goto out_unlock;
        }
 
-       if (flags & ICE_VF_RESET_LOCK)
-               mutex_lock(&vf->cfg_lock);
-       else
-               lockdep_assert_held(&vf->cfg_lock);
-
        /* Set VF disable bit state here, before triggering reset */
        set_bit(ICE_VF_STATE_DIS, vf->vf_states);
        ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false);
index 67172fd..48fea6f 100644 (file)
@@ -215,7 +215,6 @@ u16 ice_get_num_vfs(struct ice_pf *pf);
 struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
 bool ice_is_vf_disabled(struct ice_vf *vf);
 int ice_check_vf_ready_for_cfg(struct ice_vf *vf);
-int ice_check_vf_ready_for_reset(struct ice_vf *vf);
 void ice_set_vf_state_dis(struct ice_vf *vf);
 bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf);
 void
index efbc296..dcf628b 100644 (file)
@@ -3947,7 +3947,6 @@ error_handler:
                ice_vc_notify_vf_link_state(vf);
                break;
        case VIRTCHNL_OP_RESET_VF:
-               clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
                ops->reset_vf(vf);
                break;
        case VIRTCHNL_OP_ADD_ETH_ADDR:
index 405886e..319c544 100644 (file)
@@ -1385,18 +1385,6 @@ void igb_ptp_init(struct igb_adapter *adapter)
                return;
        }
 
-       spin_lock_init(&adapter->tmreg_lock);
-       INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
-
-       if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
-               INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
-                                 igb_ptp_overflow_check);
-
-       adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
-       adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
-
-       igb_ptp_reset(adapter);
-
        adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
                                                &adapter->pdev->dev);
        if (IS_ERR(adapter->ptp_clock)) {
@@ -1406,6 +1394,18 @@ void igb_ptp_init(struct igb_adapter *adapter)
                dev_info(&adapter->pdev->dev, "added PHC on %s\n",
                         adapter->netdev->name);
                adapter->ptp_flags |= IGB_PTP_ENABLED;
+
+               spin_lock_init(&adapter->tmreg_lock);
+               INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
+
+               if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+                       INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
+                                         igb_ptp_overflow_check);
+
+               adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+               adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+               igb_ptp_reset(adapter);
        }
 }
 
index 00a5ee4..38901d2 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
 #include <linux/bitfield.h>
+#include <linux/hrtimer.h>
 
 #include "igc_hw.h"
 
@@ -101,6 +102,8 @@ struct igc_ring {
        u32 start_time;
        u32 end_time;
        u32 max_sdu;
+       bool oper_gate_closed;          /* Operating gate. True if the TX Queue is closed */
+       bool admin_gate_closed;         /* Future gate. True if the TX Queue will be closed */
 
        /* CBS parameters */
        bool cbs_enable;                /* indicates if CBS is enabled */
@@ -160,6 +163,7 @@ struct igc_adapter {
        struct timer_list watchdog_timer;
        struct timer_list dma_err_timer;
        struct timer_list phy_info_timer;
+       struct hrtimer hrtimer;
 
        u32 wol;
        u32 en_mng_pt;
@@ -184,10 +188,17 @@ struct igc_adapter {
        u32 max_frame_size;
        u32 min_frame_size;
 
+       int tc_setup_type;
        ktime_t base_time;
        ktime_t cycle_time;
-       bool qbv_enable;
+       bool taprio_offload_enable;
        u32 qbv_config_change_errors;
+       bool qbv_transition;
+       unsigned int qbv_count;
+       /* Access to oper_gate_closed, admin_gate_closed and qbv_transition
+        * are protected by the qbv_tx_lock.
+        */
+       spinlock_t qbv_tx_lock;
 
        /* OS defined structs */
        struct pci_dev *pdev;
index 44a5070..2f780cc 100644 (file)
 #define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */
 #define IGC_PTM_CTRL_EN                BIT(30) /* Enable PTM */
 #define IGC_PTM_CTRL_TRIG      BIT(31) /* PTM Cycle trigger */
-#define IGC_PTM_CTRL_SHRT_CYC(usec)    (((usec) & 0x2f) << 2)
+#define IGC_PTM_CTRL_SHRT_CYC(usec)    (((usec) & 0x3f) << 2)
 #define IGC_PTM_CTRL_PTM_TO(usec)      (((usec) & 0xff) << 8)
 
 #define IGC_PTM_SHORT_CYC_DEFAULT      10  /* Default Short/interrupted cycle interval */
index 0e2cb00..93bce72 100644 (file)
@@ -1708,6 +1708,8 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
        /* twisted pair */
        cmd->base.port = PORT_TP;
        cmd->base.phy_address = hw->phy.addr;
+       ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
+       ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
 
        /* advertising link modes */
        if (hw->phy.autoneg_advertised & ADVERTISE_10_HALF)
index 019ce91..6f557e8 100644 (file)
@@ -316,6 +316,33 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
                        igc_clean_tx_ring(adapter->tx_ring[i]);
 }
 
+static void igc_disable_tx_ring_hw(struct igc_ring *ring)
+{
+       struct igc_hw *hw = &ring->q_vector->adapter->hw;
+       u8 idx = ring->reg_idx;
+       u32 txdctl;
+
+       txdctl = rd32(IGC_TXDCTL(idx));
+       txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
+       txdctl |= IGC_TXDCTL_SWFLUSH;
+       wr32(IGC_TXDCTL(idx), txdctl);
+}
+
+/**
+ * igc_disable_all_tx_rings_hw - Disable all transmit queue operation
+ * @adapter: board private structure
+ */
+static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+               igc_disable_tx_ring_hw(tx_ring);
+       }
+}
+
 /**
  * igc_setup_tx_resources - allocate Tx resources (Descriptors)
  * @tx_ring: tx descriptor ring (for a specific queue) to setup
@@ -711,7 +738,6 @@ static void igc_configure_tx_ring(struct igc_adapter *adapter,
        /* disable the queue */
        wr32(IGC_TXDCTL(reg_idx), 0);
        wrfl();
-       mdelay(10);
 
        wr32(IGC_TDLEN(reg_idx),
             ring->count * sizeof(union igc_adv_tx_desc));
@@ -1017,7 +1043,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
        ktime_t base_time = adapter->base_time;
        ktime_t now = ktime_get_clocktai();
        ktime_t baset_est, end_of_cycle;
-       u32 launchtime;
+       s32 launchtime;
        s64 n;
 
        n = div64_s64(ktime_sub_ns(now, base_time), cycle_time);
@@ -1030,7 +1056,7 @@ static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
                        *first_flag = true;
                        ring->last_ff_cycle = baset_est;
 
-                       if (ktime_compare(txtime, ring->last_tx_cycle) > 0)
+                       if (ktime_compare(end_of_cycle, ring->last_tx_cycle) > 0)
                                *insert_empty = true;
                }
        }
@@ -1573,16 +1599,12 @@ done:
        first->bytecount = skb->len;
        first->gso_segs = 1;
 
-       if (tx_ring->max_sdu > 0) {
-               u32 max_sdu = 0;
-
-               max_sdu = tx_ring->max_sdu +
-                         (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0);
+       if (adapter->qbv_transition || tx_ring->oper_gate_closed)
+               goto out_drop;
 
-               if (first->bytecount > max_sdu) {
-                       adapter->stats.txdrop++;
-                       goto out_drop;
-               }
+       if (tx_ring->max_sdu > 0 && first->bytecount > tx_ring->max_sdu) {
+               adapter->stats.txdrop++;
+               goto out_drop;
        }
 
        if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
@@ -2833,9 +2855,8 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
        struct netdev_queue *nq = txring_txq(ring);
        union igc_adv_tx_desc *tx_desc = NULL;
        int cpu = smp_processor_id();
-       u16 ntu = ring->next_to_use;
        struct xdp_desc xdp_desc;
-       u16 budget;
+       u16 budget, ntu;
 
        if (!netif_carrier_ok(ring->netdev))
                return;
@@ -2845,6 +2866,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
        /* Avoid transmit queue timeout since we share it with the slow path */
        txq_trans_cond_update(nq);
 
+       ntu = ring->next_to_use;
        budget = igc_desc_unused(ring);
 
        while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
@@ -3012,8 +3034,8 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
                    time_after(jiffies, tx_buffer->time_stamp +
                    (adapter->tx_timeout_factor * HZ)) &&
                    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) &&
-                   (rd32(IGC_TDH(tx_ring->reg_idx)) !=
-                    readl(tx_ring->tail))) {
+                   (rd32(IGC_TDH(tx_ring->reg_idx)) != readl(tx_ring->tail)) &&
+                   !tx_ring->oper_gate_closed) {
                        /* detected Tx unit hang */
                        netdev_err(tx_ring->netdev,
                                   "Detected Tx Unit Hang\n"
@@ -4779,6 +4801,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
        adapter->nfc_rule_count = 0;
 
        spin_lock_init(&adapter->stats64_lock);
+       spin_lock_init(&adapter->qbv_tx_lock);
        /* Assume MSI-X interrupts, will be checked during IRQ allocation */
        adapter->flags |= IGC_FLAG_HAS_MSIX;
 
@@ -5063,6 +5086,7 @@ void igc_down(struct igc_adapter *adapter)
        /* clear VLAN promisc flag so VFTA will be updated if necessary */
        adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
 
+       igc_disable_all_tx_rings_hw(adapter);
        igc_clean_all_tx_rings(adapter);
        igc_clean_all_rx_rings(adapter);
 }
@@ -6096,13 +6120,16 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
        return igc_tsn_offload_apply(adapter);
 }
 
-static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+static int igc_qbv_clear_schedule(struct igc_adapter *adapter)
 {
+       unsigned long flags;
        int i;
 
        adapter->base_time = 0;
        adapter->cycle_time = NSEC_PER_SEC;
+       adapter->taprio_offload_enable = false;
        adapter->qbv_config_change_errors = 0;
+       adapter->qbv_count = 0;
 
        for (i = 0; i < adapter->num_tx_queues; i++) {
                struct igc_ring *ring = adapter->tx_ring[i];
@@ -6112,6 +6139,26 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
                ring->max_sdu = 0;
        }
 
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+       adapter->qbv_transition = false;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+
+               ring->oper_gate_closed = false;
+               ring->admin_gate_closed = false;
+       }
+
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+       return 0;
+}
+
+static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+{
+       igc_qbv_clear_schedule(adapter);
+
        return 0;
 }
 
@@ -6121,27 +6168,21 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
        bool queue_configured[IGC_MAX_TX_QUEUES] = { };
        struct igc_hw *hw = &adapter->hw;
        u32 start_time = 0, end_time = 0;
+       struct timespec64 now;
+       unsigned long flags;
        size_t n;
        int i;
 
-       switch (qopt->cmd) {
-       case TAPRIO_CMD_REPLACE:
-               adapter->qbv_enable = true;
-               break;
-       case TAPRIO_CMD_DESTROY:
-               adapter->qbv_enable = false;
-               break;
-       default:
-               return -EOPNOTSUPP;
-       }
-
-       if (!adapter->qbv_enable)
+       if (qopt->cmd == TAPRIO_CMD_DESTROY)
                return igc_tsn_clear_schedule(adapter);
 
+       if (qopt->cmd != TAPRIO_CMD_REPLACE)
+               return -EOPNOTSUPP;
+
        if (qopt->base_time < 0)
                return -ERANGE;
 
-       if (igc_is_device_id_i225(hw) && adapter->base_time)
+       if (igc_is_device_id_i225(hw) && adapter->taprio_offload_enable)
                return -EALREADY;
 
        if (!validate_schedule(adapter, qopt))
@@ -6149,6 +6190,9 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 
        adapter->cycle_time = qopt->cycle_time;
        adapter->base_time = qopt->base_time;
+       adapter->taprio_offload_enable = true;
+
+       igc_ptp_read(adapter, &now);
 
        for (n = 0; n < qopt->num_entries; n++) {
                struct tc_taprio_sched_entry *e = &qopt->entries[n];
@@ -6184,30 +6228,49 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
                                ring->start_time = start_time;
                        ring->end_time = end_time;
 
-                       queue_configured[i] = true;
+                       if (ring->start_time >= adapter->cycle_time)
+                               queue_configured[i] = false;
+                       else
+                               queue_configured[i] = true;
                }
 
                start_time += e->interval;
        }
 
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
        /* Check whether a queue gets configured.
         * If not, set the start and end time to be end time.
         */
        for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *ring = adapter->tx_ring[i];
+
+               if (!is_base_time_past(qopt->base_time, &now)) {
+                       ring->admin_gate_closed = false;
+               } else {
+                       ring->oper_gate_closed = false;
+                       ring->admin_gate_closed = false;
+               }
+
                if (!queue_configured[i]) {
-                       struct igc_ring *ring = adapter->tx_ring[i];
+                       if (!is_base_time_past(qopt->base_time, &now))
+                               ring->admin_gate_closed = true;
+                       else
+                               ring->oper_gate_closed = true;
 
                        ring->start_time = end_time;
                        ring->end_time = end_time;
                }
        }
 
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
        for (i = 0; i < adapter->num_tx_queues; i++) {
                struct igc_ring *ring = adapter->tx_ring[i];
                struct net_device *dev = adapter->netdev;
 
                if (qopt->max_sdu[i])
-                       ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len;
+                       ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len - ETH_TLEN;
                else
                        ring->max_sdu = 0;
        }
@@ -6327,6 +6390,8 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
 {
        struct igc_adapter *adapter = netdev_priv(dev);
 
+       adapter->tc_setup_type = type;
+
        switch (type) {
        case TC_QUERY_CAPS:
                return igc_tc_query_caps(adapter, type_data);
@@ -6574,6 +6639,33 @@ static const struct xdp_metadata_ops igc_xdp_metadata_ops = {
        .xmo_rx_timestamp               = igc_xdp_rx_timestamp,
 };
 
+static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
+{
+       struct igc_adapter *adapter = container_of(timer, struct igc_adapter,
+                                                  hrtimer);
+       unsigned long flags;
+       unsigned int i;
+
+       spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+       adapter->qbv_transition = true;
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+               if (tx_ring->admin_gate_closed) {
+                       tx_ring->admin_gate_closed = false;
+                       tx_ring->oper_gate_closed = true;
+               } else {
+                       tx_ring->oper_gate_closed = false;
+               }
+       }
+       adapter->qbv_transition = false;
+
+       spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+       return HRTIMER_NORESTART;
+}
+
 /**
  * igc_probe - Device Initialization Routine
  * @pdev: PCI device information struct
@@ -6752,6 +6844,9 @@ static int igc_probe(struct pci_dev *pdev,
        INIT_WORK(&adapter->reset_task, igc_reset_task);
        INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
 
+       hrtimer_init(&adapter->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+       adapter->hrtimer.function = &igc_qbv_scheduling_timer;
+
        /* Initialize link properties that are user-changeable */
        adapter->fc_autoneg = true;
        hw->mac.autoneg = true;
@@ -6855,6 +6950,7 @@ static void igc_remove(struct pci_dev *pdev)
 
        cancel_work_sync(&adapter->reset_task);
        cancel_work_sync(&adapter->watchdog_task);
+       hrtimer_cancel(&adapter->hrtimer);
 
        /* Release control of h/w to f/w.  If f/w is AMT enabled, this
         * would have already happened in close and is redundant.
@@ -7252,18 +7348,6 @@ void igc_enable_rx_ring(struct igc_ring *ring)
                igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
 }
 
-static void igc_disable_tx_ring_hw(struct igc_ring *ring)
-{
-       struct igc_hw *hw = &ring->q_vector->adapter->hw;
-       u8 idx = ring->reg_idx;
-       u32 txdctl;
-
-       txdctl = rd32(IGC_TXDCTL(idx));
-       txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
-       txdctl |= IGC_TXDCTL_SWFLUSH;
-       wr32(IGC_TXDCTL(idx), txdctl);
-}
-
 void igc_disable_tx_ring(struct igc_ring *ring)
 {
        igc_disable_tx_ring_hw(ring);
index 32ef112..f0b979a 100644 (file)
@@ -356,16 +356,35 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
                        tsim &= ~IGC_TSICR_TT0;
                }
                if (on) {
+                       struct timespec64 safe_start;
                        int i = rq->perout.index;
 
                        igc_pin_perout(igc, i, pin, use_freq);
-                       igc->perout[i].start.tv_sec = rq->perout.start.sec;
+                       igc_ptp_read(igc, &safe_start);
+
+                       /* PPS output start time is triggered by Target time(TT)
+                        * register. Programming any past time value into TT
+                        * register will cause PPS to never start. Need to make
+                        * sure we program the TT register a time ahead in
+                        * future. There isn't a stringent need to fire PPS out
+                        * right away. Adding +2 seconds should take care of
+                        * corner cases. Let's say if the SYSTIML is close to
+                        * wrap up and the timer keeps ticking as we program the
+                        * register, adding +2seconds is safe bet.
+                        */
+                       safe_start.tv_sec += 2;
+
+                       if (rq->perout.start.sec < safe_start.tv_sec)
+                               igc->perout[i].start.tv_sec = safe_start.tv_sec;
+                       else
+                               igc->perout[i].start.tv_sec = rq->perout.start.sec;
                        igc->perout[i].start.tv_nsec = rq->perout.start.nsec;
                        igc->perout[i].period.tv_sec = ts.tv_sec;
                        igc->perout[i].period.tv_nsec = ts.tv_nsec;
-                       wr32(trgttimh, rq->perout.start.sec);
+                       wr32(trgttimh, (u32)igc->perout[i].start.tv_sec);
                        /* For now, always select timer 0 as source. */
-                       wr32(trgttiml, rq->perout.start.nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
+                       wr32(trgttiml, (u32)(igc->perout[i].start.tv_nsec |
+                                            IGC_TT_IO_TIMER_SEL_SYSTIM0));
                        if (use_freq)
                                wr32(freqout, ns);
                        tsauxc |= tsauxc_mask;
index 94a2b0d..a9c0832 100644 (file)
@@ -37,7 +37,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
 {
        unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED;
 
-       if (adapter->qbv_enable)
+       if (adapter->taprio_offload_enable)
                new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
 
        if (is_any_launchtime(adapter))
@@ -114,7 +114,6 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter)
 static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 {
        struct igc_hw *hw = &adapter->hw;
-       bool tsn_mode_reconfig = false;
        u32 tqavctrl, baset_l, baset_h;
        u32 sec, nsec, cycle;
        ktime_t base_time, systim;
@@ -133,8 +132,28 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
                wr32(IGC_STQT(i), ring->start_time);
                wr32(IGC_ENDQT(i), ring->end_time);
 
-               txqctl |= IGC_TXQCTL_STRICT_CYCLE |
-                       IGC_TXQCTL_STRICT_END;
+               if (adapter->taprio_offload_enable) {
+                       /* If taprio_offload_enable is set we are in "taprio"
+                        * mode and we need to be strict about the
+                        * cycles: only transmit a packet if it can be
+                        * completed during that cycle.
+                        *
+                        * If taprio_offload_enable is NOT true when
+                        * enabling TSN offload, the cycle should have
+                        * no external effects, but is only used internally
+                        * to adapt the base time register after a second
+                        * has passed.
+                        *
+                        * Enabling strict mode in this case would
+                        * unnecessarily prevent the transmission of
+                        * certain packets (i.e. at the boundary of a
+                        * second) and thus interfere with the launchtime
+                        * feature that promises transmission at a
+                        * certain point in time.
+                        */
+                       txqctl |= IGC_TXQCTL_STRICT_CYCLE |
+                               IGC_TXQCTL_STRICT_END;
+               }
 
                if (ring->launchtime_enable)
                        txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
@@ -228,11 +247,10 @@ skip_cbs:
 
        tqavctrl = rd32(IGC_TQAVCTRL) & ~IGC_TQAVCTRL_FUTSCDDIS;
 
-       if (tqavctrl & IGC_TQAVCTRL_TRANSMIT_MODE_TSN)
-               tsn_mode_reconfig = true;
-
        tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV;
 
+       adapter->qbv_count++;
+
        cycle = adapter->cycle_time;
        base_time = adapter->base_time;
 
@@ -249,17 +267,29 @@ skip_cbs:
                 * Gate Control List (GCL) is running.
                 */
                if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) &&
-                   tsn_mode_reconfig)
+                   (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) &&
+                   (adapter->qbv_count > 1))
                        adapter->qbv_config_change_errors++;
        } else {
-               /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit
-                * has to be configured before the cycle time and base time.
-                * Tx won't hang if there is a GCL is already running,
-                * so in this case we don't need to set FutScdDis.
-                */
-               if (igc_is_device_id_i226(hw) &&
-                   !(rd32(IGC_BASET_H) || rd32(IGC_BASET_L)))
-                       tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS;
+               if (igc_is_device_id_i226(hw)) {
+                       ktime_t adjust_time, expires_time;
+
+                      /* According to datasheet section 7.5.2.9.3.3, FutScdDis bit
+                       * has to be configured before the cycle time and base time.
+                       * Tx won't hang if a GCL is already running,
+                       * so in this case we don't need to set FutScdDis.
+                       */
+                       if (!(rd32(IGC_BASET_H) || rd32(IGC_BASET_L)))
+                               tqavctrl |= IGC_TQAVCTRL_FUTSCDDIS;
+
+                       nsec = rd32(IGC_SYSTIML);
+                       sec = rd32(IGC_SYSTIMH);
+                       systim = ktime_set(sec, nsec);
+
+                       adjust_time = adapter->base_time;
+                       expires_time = ktime_sub_ns(adjust_time, systim);
+                       hrtimer_start(&adapter->hrtimer, expires_time, HRTIMER_MODE_REL);
+               }
        }
 
        wr32(IGC_TQAVCTRL, tqavctrl);
@@ -305,7 +335,11 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter)
 {
        struct igc_hw *hw = &adapter->hw;
 
-       if (netif_running(adapter->netdev) && igc_is_device_id_i225(hw)) {
+       /* Per I225/6 HW Design Section 7.5.2.1, transmit mode
+        * cannot be changed dynamically. Require reset the adapter.
+        */
+       if (netif_running(adapter->netdev) &&
+           (igc_is_device_id_i225(hw) || !adapter->qbv_count)) {
                schedule_work(&adapter->reset_task);
                return 0;
        }
index 1726297..8eb9839 100644 (file)
@@ -8479,7 +8479,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
                struct ixgbe_adapter *adapter = q_vector->adapter;
 
                if (unlikely(skb_tail_pointer(skb) < hdr.network +
-                            VXLAN_HEADROOM))
+                            vxlan_headroom(0)))
                        return;
 
                /* verify the port is recognized as VXLAN */
index 2b9335c..8537578 100644 (file)
@@ -1302,11 +1302,10 @@ static int korina_probe(struct platform_device *pdev)
        else if (of_get_ethdev_address(pdev->dev.of_node, dev) < 0)
                eth_hw_addr_random(dev);
 
-       clk = devm_clk_get_optional(&pdev->dev, "mdioclk");
+       clk = devm_clk_get_optional_enabled(&pdev->dev, "mdioclk");
        if (IS_ERR(clk))
                return PTR_ERR(clk);
        if (clk) {
-               clk_prepare_enable(clk);
                lp->mii_clock_freq = clk_get_rate(clk);
        } else {
                lp->mii_clock_freq = 200000000; /* max possible input clk */
index ff5647b..acf4f6b 100644 (file)
@@ -1511,7 +1511,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
                         */
                        if (txq_number == 1)
                                txq_map = (cpu == pp->rxq_def) ?
-                                       MVNETA_CPU_TXQ_ACCESS(1) : 0;
+                                       MVNETA_CPU_TXQ_ACCESS(0) : 0;
 
                } else {
                        txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
@@ -4356,7 +4356,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp)
                 */
                if (txq_number == 1)
                        txq_map = (cpu == elected_cpu) ?
-                               MVNETA_CPU_TXQ_ACCESS(1) : 0;
+                               MVNETA_CPU_TXQ_ACCESS(0) : 0;
                else
                        txq_map = mvreg_read(pp, MVNETA_CPU_MAP(cpu)) &
                                MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
index 035ead7..dab61cc 100644 (file)
@@ -98,6 +98,9 @@ int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox)
        writeq(OCTEP_CTRL_MBOX_STATUS_INIT,
               OCTEP_CTRL_MBOX_INFO_HOST_STATUS(mbox->barmem));
 
+       mutex_init(&mbox->h2fq_lock);
+       mutex_init(&mbox->f2hq_lock);
+
        mbox->h2fq.sz = readl(OCTEP_CTRL_MBOX_H2FQ_SZ(mbox->barmem));
        mbox->h2fq.hw_prod = OCTEP_CTRL_MBOX_H2FQ_PROD(mbox->barmem);
        mbox->h2fq.hw_cons = OCTEP_CTRL_MBOX_H2FQ_CONS(mbox->barmem);
index 1cc6af2..565320e 100644 (file)
@@ -55,7 +55,7 @@ static int octep_send_mbox_req(struct octep_device *oct,
        list_add_tail(&d->list, &oct->ctrl_req_wait_list);
        ret = wait_event_interruptible_timeout(oct->ctrl_req_wait_q,
                                               (d->done != 0),
-                                              jiffies + msecs_to_jiffies(500));
+                                              msecs_to_jiffies(500));
        list_del(&d->list);
        if (ret == 0 || ret == 1)
                return -EAGAIN;
index 43eb6e8..4424de2 100644 (file)
@@ -1038,6 +1038,10 @@ static void octep_device_cleanup(struct octep_device *oct)
 {
        int i;
 
+       oct->poll_non_ioq_intr = false;
+       cancel_delayed_work_sync(&oct->intr_poll_task);
+       cancel_work_sync(&oct->ctrl_mbox_task);
+
        dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n");
 
        for (i = 0; i < OCTEP_MAX_VF; i++) {
@@ -1200,14 +1204,11 @@ static void octep_remove(struct pci_dev *pdev)
        if (!oct)
                return;
 
-       cancel_work_sync(&oct->tx_timeout_task);
-       cancel_work_sync(&oct->ctrl_mbox_task);
        netdev = oct->netdev;
        if (netdev->reg_state == NETREG_REGISTERED)
                unregister_netdev(netdev);
 
-       oct->poll_non_ioq_intr = false;
-       cancel_delayed_work_sync(&oct->intr_poll_task);
+       cancel_work_sync(&oct->tx_timeout_task);
        octep_device_cleanup(oct);
        pci_release_mem_regions(pdev);
        free_netdev(netdev);
index 3411e2e..0ee420a 100644 (file)
@@ -208,7 +208,7 @@ struct ptp *ptp_get(void)
        /* Check driver is bound to PTP block */
        if (!ptp)
                ptp = ERR_PTR(-EPROBE_DEFER);
-       else
+       else if (!IS_ERR(ptp))
                pci_dev_get(ptp->pdev);
 
        return ptp;
@@ -388,11 +388,10 @@ static int ptp_extts_on(struct ptp *ptp, int on)
 static int ptp_probe(struct pci_dev *pdev,
                     const struct pci_device_id *ent)
 {
-       struct device *dev = &pdev->dev;
        struct ptp *ptp;
        int err;
 
-       ptp = devm_kzalloc(dev, sizeof(*ptp), GFP_KERNEL);
+       ptp = kzalloc(sizeof(*ptp), GFP_KERNEL);
        if (!ptp) {
                err = -ENOMEM;
                goto error;
@@ -428,20 +427,19 @@ static int ptp_probe(struct pci_dev *pdev,
        return 0;
 
 error_free:
-       devm_kfree(dev, ptp);
+       kfree(ptp);
 
 error:
        /* For `ptp_get()` we need to differentiate between the case
         * when the core has not tried to probe this device and the case when
-        * the probe failed.  In the later case we pretend that the
-        * initialization was successful and keep the error in
+        * the probe failed.  In the later case we keep the error in
         * `dev->driver_data`.
         */
        pci_set_drvdata(pdev, ERR_PTR(err));
        if (!first_ptp_block)
                first_ptp_block = ERR_PTR(err);
 
-       return 0;
+       return err;
 }
 
 static void ptp_remove(struct pci_dev *pdev)
@@ -449,16 +447,17 @@ static void ptp_remove(struct pci_dev *pdev)
        struct ptp *ptp = pci_get_drvdata(pdev);
        u64 clock_cfg;
 
-       if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer))
-               hrtimer_cancel(&ptp->hrtimer);
-
        if (IS_ERR_OR_NULL(ptp))
                return;
 
+       if (cn10k_ptp_errata(ptp) && hrtimer_active(&ptp->hrtimer))
+               hrtimer_cancel(&ptp->hrtimer);
+
        /* Disable PTP clock */
        clock_cfg = readq(ptp->reg_base + PTP_CLOCK_CFG);
        clock_cfg &= ~PTP_CLOCK_CFG_PTP_EN;
        writeq(clock_cfg, ptp->reg_base + PTP_CLOCK_CFG);
+       kfree(ptp);
 }
 
 static const struct pci_device_id ptp_id_table[] = {
index 8dbc35c..73df2d5 100644 (file)
@@ -3252,7 +3252,7 @@ static int rvu_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        rvu->ptp = ptp_get();
        if (IS_ERR(rvu->ptp)) {
                err = PTR_ERR(rvu->ptp);
-               if (err == -EPROBE_DEFER)
+               if (err)
                        goto err_release_regions;
                rvu->ptp = NULL;
        }
index 0d745ae..c2f6867 100644 (file)
@@ -4069,21 +4069,14 @@ int rvu_mbox_handler_nix_set_rx_mode(struct rvu *rvu, struct nix_rx_mode *req,
        }
 
        /* install/uninstall promisc entry */
-       if (promisc) {
+       if (promisc)
                rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
                                              pfvf->rx_chan_base,
                                              pfvf->rx_chan_cnt);
-
-               if (rvu_npc_exact_has_match_table(rvu))
-                       rvu_npc_exact_promisc_enable(rvu, pcifunc);
-       } else {
+       else
                if (!nix_rx_multicast)
                        rvu_npc_enable_promisc_entry(rvu, pcifunc, nixlf, false);
 
-               if (rvu_npc_exact_has_match_table(rvu))
-                       rvu_npc_exact_promisc_disable(rvu, pcifunc);
-       }
-
        return 0;
 }
 
@@ -4277,9 +4270,10 @@ rx_frscfg:
        if (link < 0)
                return NIX_AF_ERR_RX_LINK_INVALID;
 
-       nix_find_link_frs(rvu, req, pcifunc);
 
 linkcfg:
+       nix_find_link_frs(rvu, req, pcifunc);
+
        cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link));
        cfg = (cfg & ~(0xFFFFULL << 16)) | ((u64)req->maxlen << 16);
        if (req->update_minlen)
index 9f11c1e..7e20282 100644 (file)
@@ -218,13 +218,54 @@ void npc_config_secret_key(struct rvu *rvu, int blkaddr)
 
 void npc_program_mkex_hash(struct rvu *rvu, int blkaddr)
 {
+       struct npc_mcam_kex_hash *mh = rvu->kpu.mkex_hash;
        struct hw_cap *hwcap = &rvu->hw->cap;
+       u8 intf, ld, hdr_offset, byte_len;
        struct rvu_hwinfo *hw = rvu->hw;
-       u8 intf;
+       u64 cfg;
 
+       /* Check if hardware supports hash extraction */
        if (!hwcap->npc_hash_extract)
                return;
 
+       /* Check if IPv6 source/destination address
+        * should be hash enabled.
+        * Hashing reduces 128bit SIP/DIP fields to 32bit
+        * so that 224 bit X2 key can be used for IPv6 based filters as well,
+        * which in turn results in more number of MCAM entries available for
+        * use.
+        *
+        * Hashing of IPV6 SIP/DIP is enabled in below scenarios
+        * 1. If the silicon variant supports hashing feature
+        * 2. If the number of bytes of IP addr being extracted is 4 bytes ie
+        *    32bit. The assumption here is that if user wants 8bytes of LSB of
+        *    IP addr or full 16 bytes then his intention is not to use 32bit
+        *    hash.
+        */
+       for (intf = 0; intf < hw->npc_intfs; intf++) {
+               for (ld = 0; ld < NPC_MAX_LD; ld++) {
+                       cfg = rvu_read64(rvu, blkaddr,
+                                        NPC_AF_INTFX_LIDX_LTX_LDX_CFG(intf,
+                                                                      NPC_LID_LC,
+                                                                      NPC_LT_LC_IP6,
+                                                                      ld));
+                       hdr_offset = FIELD_GET(NPC_HDR_OFFSET, cfg);
+                       byte_len = FIELD_GET(NPC_BYTESM, cfg);
+                       /* Hashing of IPv6 source/destination address should be
+                        * enabled if,
+                        * hdr_offset == 8 (offset of source IPv6 address) or
+                        * hdr_offset == 24 (offset of destination IPv6)
+                        * address) and the number of byte to be
+                        * extracted is 4. As per hardware configuration
+                        * byte_len should be == actual byte_len - 1.
+                        * Hence byte_len is checked against 3 but nor 4.
+                        */
+                       if ((hdr_offset == 8 || hdr_offset == 24) && byte_len == 3)
+                               mh->lid_lt_ld_hash_en[intf][NPC_LID_LC][NPC_LT_LC_IP6][ld] = true;
+               }
+       }
+
+       /* Update hash configuration if the field is hash enabled */
        for (intf = 0; intf < hw->npc_intfs; intf++) {
                npc_program_mkex_hash_rx(rvu, blkaddr, intf);
                npc_program_mkex_hash_tx(rvu, blkaddr, intf);
@@ -1164,8 +1205,10 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i
 {
        struct npc_exact_table *table;
        u16 *cnt, old_cnt;
+       bool promisc;
 
        table = rvu->hw->table;
+       promisc = table->promisc_mode[drop_mcam_idx];
 
        cnt = &table->cnt_cmd_rules[drop_mcam_idx];
        old_cnt = *cnt;
@@ -1177,13 +1220,18 @@ static u16 __rvu_npc_exact_cmd_rules_cnt_update(struct rvu *rvu, int drop_mcam_i
 
        *enable_or_disable_cam = false;
 
-       /* If all rules are deleted, disable cam */
+       if (promisc)
+               goto done;
+
+       /* If all rules are deleted and not already in promisc mode;
+        * disable cam
+        */
        if (!*cnt && val < 0) {
                *enable_or_disable_cam = true;
                goto done;
        }
 
-       /* If rule got added, enable cam */
+       /* If rule got added and not already in promisc mode; enable cam */
        if (!old_cnt && val > 0) {
                *enable_or_disable_cam = true;
                goto done;
@@ -1462,6 +1510,12 @@ int rvu_npc_exact_promisc_disable(struct rvu *rvu, u16 pcifunc)
        *promisc = false;
        mutex_unlock(&table->lock);
 
+       /* Enable drop rule */
+       rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX,
+                                          true);
+
+       dev_dbg(rvu->dev, "%s: disabled  promisc mode (cgx=%d lmac=%d)\n",
+               __func__, cgx_id, lmac_id);
        return 0;
 }
 
@@ -1503,6 +1557,12 @@ int rvu_npc_exact_promisc_enable(struct rvu *rvu, u16 pcifunc)
        *promisc = true;
        mutex_unlock(&table->lock);
 
+       /*  disable drop rule */
+       rvu_npc_enable_mcam_by_entry_index(rvu, drop_mcam_idx, NIX_INTF_RX,
+                                          false);
+
+       dev_dbg(rvu->dev, "%s: Enabled promisc mode (cgx=%d lmac=%d)\n",
+               __func__, cgx_id, lmac_id);
        return 0;
 }
 
index a1c3d98..57a0932 100644 (file)
@@ -70,8 +70,8 @@ static struct npc_mcam_kex_hash npc_mkex_hash_default __maybe_unused = {
        [NIX_INTF_RX] = {
                [NPC_LID_LC] = {
                        [NPC_LT_LC_IP6] = {
-                               true,
-                               true,
+                               false,
+                               false,
                        },
                },
        },
@@ -79,8 +79,8 @@ static struct npc_mcam_kex_hash npc_mkex_hash_default __maybe_unused = {
        [NIX_INTF_TX] = {
                [NPC_LID_LC] = {
                        [NPC_LT_LC_IP6] = {
-                               true,
-                               true,
+                               false,
+                               false,
                        },
                },
        },
index 6e2fb24..59b1382 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2022 Marvell.
  */
 
+#include <crypto/skcipher.h>
 #include <linux/rtnetlink.h>
 #include <linux/bitfield.h>
 #include "otx2_common.h"
 #define MCS_TCI_E                      0x08 /* encryption */
 #define MCS_TCI_C                      0x04 /* changed text */
 
+#define CN10K_MAX_HASH_LEN             16
+#define CN10K_MAX_SAK_LEN              32
+
+static int cn10k_ecb_aes_encrypt(struct otx2_nic *pfvf, u8 *sak,
+                                u16 sak_len, u8 *hash)
+{
+       u8 data[CN10K_MAX_HASH_LEN] = { 0 };
+       struct skcipher_request *req = NULL;
+       struct scatterlist sg_src, sg_dst;
+       struct crypto_skcipher *tfm;
+       DECLARE_CRYPTO_WAIT(wait);
+       int err;
+
+       tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
+       if (IS_ERR(tfm)) {
+               dev_err(pfvf->dev, "failed to allocate transform for ecb-aes\n");
+               return PTR_ERR(tfm);
+       }
+
+       req = skcipher_request_alloc(tfm, GFP_KERNEL);
+       if (!req) {
+               dev_err(pfvf->dev, "failed to allocate request for skcipher\n");
+               err = -ENOMEM;
+               goto free_tfm;
+       }
+
+       err = crypto_skcipher_setkey(tfm, sak, sak_len);
+       if (err) {
+               dev_err(pfvf->dev, "failed to set key for skcipher\n");
+               goto free_req;
+       }
+
+       /* build sg list */
+       sg_init_one(&sg_src, data, CN10K_MAX_HASH_LEN);
+       sg_init_one(&sg_dst, hash, CN10K_MAX_HASH_LEN);
+
+       skcipher_request_set_callback(req, 0, crypto_req_done, &wait);
+       skcipher_request_set_crypt(req, &sg_src, &sg_dst,
+                                  CN10K_MAX_HASH_LEN, NULL);
+
+       err = crypto_skcipher_encrypt(req);
+       err = crypto_wait_req(err, &wait);
+
+free_req:
+       skcipher_request_free(req);
+free_tfm:
+       crypto_free_skcipher(tfm);
+       return err;
+}
+
 static struct cn10k_mcs_txsc *cn10k_mcs_get_txsc(struct cn10k_mcs_cfg *cfg,
                                                 struct macsec_secy *secy)
 {
@@ -330,19 +381,53 @@ fail:
        return ret;
 }
 
+static int cn10k_mcs_write_keys(struct otx2_nic *pfvf,
+                               struct macsec_secy *secy,
+                               struct mcs_sa_plcy_write_req *req,
+                               u8 *sak, u8 *salt, ssci_t ssci)
+{
+       u8 hash_rev[CN10K_MAX_HASH_LEN];
+       u8 sak_rev[CN10K_MAX_SAK_LEN];
+       u8 salt_rev[MACSEC_SALT_LEN];
+       u8 hash[CN10K_MAX_HASH_LEN];
+       u32 ssci_63_32;
+       int err, i;
+
+       err = cn10k_ecb_aes_encrypt(pfvf, sak, secy->key_len, hash);
+       if (err) {
+               dev_err(pfvf->dev, "Generating hash using ECB(AES) failed\n");
+               return err;
+       }
+
+       for (i = 0; i < secy->key_len; i++)
+               sak_rev[i] = sak[secy->key_len - 1 - i];
+
+       for (i = 0; i < CN10K_MAX_HASH_LEN; i++)
+               hash_rev[i] = hash[CN10K_MAX_HASH_LEN - 1 - i];
+
+       for (i = 0; i < MACSEC_SALT_LEN; i++)
+               salt_rev[i] = salt[MACSEC_SALT_LEN - 1 - i];
+
+       ssci_63_32 = (__force u32)cpu_to_be32((__force u32)ssci);
+
+       memcpy(&req->plcy[0][0], sak_rev, secy->key_len);
+       memcpy(&req->plcy[0][4], hash_rev, CN10K_MAX_HASH_LEN);
+       memcpy(&req->plcy[0][6], salt_rev, MACSEC_SALT_LEN);
+       req->plcy[0][7] |= (u64)ssci_63_32 << 32;
+
+       return 0;
+}
+
 static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf,
                                      struct macsec_secy *secy,
                                      struct cn10k_mcs_rxsc *rxsc,
                                      u8 assoc_num, bool sa_in_use)
 {
-       unsigned char *src = rxsc->sa_key[assoc_num];
        struct mcs_sa_plcy_write_req *plcy_req;
-       u8 *salt_p = rxsc->salt[assoc_num];
+       u8 *sak = rxsc->sa_key[assoc_num];
+       u8 *salt = rxsc->salt[assoc_num];
        struct mcs_rx_sc_sa_map *map_req;
        struct mbox *mbox = &pfvf->mbox;
-       u64 ssci_salt_95_64 = 0;
-       u8 reg, key_len;
-       u64 salt_63_0;
        int ret;
 
        mutex_lock(&mbox->lock);
@@ -360,20 +445,10 @@ static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf,
                goto fail;
        }
 
-       for (reg = 0, key_len = 0; key_len < secy->key_len; key_len += 8) {
-               memcpy((u8 *)&plcy_req->plcy[0][reg],
-                      (src + reg * 8), 8);
-               reg++;
-       }
-
-       if (secy->xpn) {
-               memcpy((u8 *)&salt_63_0, salt_p, 8);
-               memcpy((u8 *)&ssci_salt_95_64, salt_p + 8, 4);
-               ssci_salt_95_64 |= (__force u64)rxsc->ssci[assoc_num] << 32;
-
-               plcy_req->plcy[0][6] = salt_63_0;
-               plcy_req->plcy[0][7] = ssci_salt_95_64;
-       }
+       ret = cn10k_mcs_write_keys(pfvf, secy, plcy_req, sak,
+                                  salt, rxsc->ssci[assoc_num]);
+       if (ret)
+               goto fail;
 
        plcy_req->sa_index[0] = rxsc->hw_sa_id[assoc_num];
        plcy_req->sa_cnt = 1;
@@ -586,13 +661,10 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf,
                                      struct cn10k_mcs_txsc *txsc,
                                      u8 assoc_num)
 {
-       unsigned char *src = txsc->sa_key[assoc_num];
        struct mcs_sa_plcy_write_req *plcy_req;
-       u8 *salt_p = txsc->salt[assoc_num];
+       u8 *sak = txsc->sa_key[assoc_num];
+       u8 *salt = txsc->salt[assoc_num];
        struct mbox *mbox = &pfvf->mbox;
-       u64 ssci_salt_95_64 = 0;
-       u8 reg, key_len;
-       u64 salt_63_0;
        int ret;
 
        mutex_lock(&mbox->lock);
@@ -603,19 +675,10 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf,
                goto fail;
        }
 
-       for (reg = 0, key_len = 0; key_len < secy->key_len; key_len += 8) {
-               memcpy((u8 *)&plcy_req->plcy[0][reg], (src + reg * 8), 8);
-               reg++;
-       }
-
-       if (secy->xpn) {
-               memcpy((u8 *)&salt_63_0, salt_p, 8);
-               memcpy((u8 *)&ssci_salt_95_64, salt_p + 8, 4);
-               ssci_salt_95_64 |= (__force u64)txsc->ssci[assoc_num] << 32;
-
-               plcy_req->plcy[0][6] = salt_63_0;
-               plcy_req->plcy[0][7] = ssci_salt_95_64;
-       }
+       ret = cn10k_mcs_write_keys(pfvf, secy, plcy_req, sak,
+                                  salt, txsc->ssci[assoc_num]);
+       if (ret)
+               goto fail;
 
        plcy_req->plcy[0][8] = assoc_num;
        plcy_req->sa_index[0] = txsc->hw_sa_id[assoc_num];
index 10e1126..2d7713a 100644 (file)
@@ -872,6 +872,14 @@ static int otx2_prepare_flow_request(struct ethtool_rx_flow_spec *fsp,
                                return -EINVAL;
 
                        vlan_etype = be16_to_cpu(fsp->h_ext.vlan_etype);
+
+                       /* Drop rule with vlan_etype == 802.1Q
+                        * and vlan_id == 0 is not supported
+                        */
+                       if (vlan_etype == ETH_P_8021Q && !fsp->m_ext.vlan_tci &&
+                           fsp->ring_cookie == RX_CLS_FLOW_DISC)
+                               return -EINVAL;
+
                        /* Only ETH_P_8021Q and ETH_P_802AD types supported */
                        if (vlan_etype != ETH_P_8021Q &&
                            vlan_etype != ETH_P_8021AD)
index fe8ea4e..9551b42 100644 (file)
@@ -1454,8 +1454,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
        if (err)
                goto err_free_npa_lf;
 
-       /* Enable backpressure */
-       otx2_nix_config_bp(pf, true);
+       /* Enable backpressure for CGX mapped PF/VFs */
+       if (!is_otx2_lbkvf(pf->pdev))
+               otx2_nix_config_bp(pf, true);
 
        /* Init Auras and pools used by NIX RQ, for free buffer ptrs */
        err = otx2_rq_aura_pool_init(pf);
index 8a13df5..5e56b6c 100644 (file)
@@ -597,6 +597,21 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
                        return -EOPNOTSUPP;
                }
 
+               if (!match.mask->vlan_id) {
+                       struct flow_action_entry *act;
+                       int i;
+
+                       flow_action_for_each(i, act, &rule->action) {
+                               if (act->id == FLOW_ACTION_DROP) {
+                                       netdev_err(nic->netdev,
+                                                  "vlan tpid 0x%x with vlan_id %d is not supported for DROP rule.\n",
+                                                  ntohs(match.key->vlan_tpid),
+                                                  match.key->vlan_id);
+                                       return -EOPNOTSUPP;
+                               }
+                       }
+               }
+
                if (match.mask->vlan_id ||
                    match.mask->vlan_dei ||
                    match.mask->vlan_priority) {
index f328d95..35857dc 100644 (file)
@@ -727,7 +727,8 @@ pick_fw_ver:
 
        err = request_firmware_direct(&fw->bin, fw_path, fw->dev.dev);
        if (err) {
-               if (ver_maj == PRESTERA_SUPP_FW_MAJ_VER) {
+               if (ver_maj != PRESTERA_PREV_FW_MAJ_VER ||
+                   ver_min != PRESTERA_PREV_FW_MIN_VER) {
                        ver_maj = PRESTERA_PREV_FW_MAJ_VER;
                        ver_min = PRESTERA_PREV_FW_MIN_VER;
 
index a9a1028..de31717 100644 (file)
@@ -166,11 +166,11 @@ prestera_util_neigh2nc_key(struct prestera_switch *sw, struct neighbour *n,
 
 static bool __prestera_fi_is_direct(struct fib_info *fi)
 {
-       struct fib_nh *fib_nh;
+       struct fib_nh_common *fib_nhc;
 
        if (fib_info_num_path(fi) == 1) {
-               fib_nh = fib_info_nh(fi, 0);
-               if (fib_nh->fib_nh_gw_family == AF_UNSPEC)
+               fib_nhc = fib_info_nhc(fi, 0);
+               if (fib_nhc->nhc_gw_family == AF_UNSPEC)
                        return true;
        }
 
@@ -261,7 +261,7 @@ static bool
 __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
                                       struct net_device *dev)
 {
-       struct fib_nh *fib_nh;
+       struct fib_nh_common *fib_nhc;
        struct fib_result res;
        bool reachable;
 
@@ -269,8 +269,8 @@ __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
 
        if (!prestera_util_kern_get_route(&res, tb_id, addr))
                if (prestera_fi_is_direct(res.fi)) {
-                       fib_nh = fib_info_nh(res.fi, 0);
-                       if (dev == fib_nh->fib_nh_dev)
+                       fib_nhc = fib_info_nhc(res.fi, 0);
+                       if (dev == fib_nhc->nhc_dev)
                                reachable = true;
                }
 
@@ -324,7 +324,7 @@ prestera_kern_fib_info_nhc(struct fib_notifier_info *info, int n)
        if (info->family == AF_INET) {
                fen4_info = container_of(info, struct fib_entry_notifier_info,
                                         info);
-               return &fib_info_nh(fen4_info->fi, n)->nh_common;
+               return fib_info_nhc(fen4_info->fi, n);
        } else if (info->family == AF_INET6) {
                fen6_info = container_of(info, struct fib6_entry_notifier_info,
                                         info);
index 834c644..2d15342 100644 (file)
@@ -3846,23 +3846,6 @@ static int mtk_hw_deinit(struct mtk_eth *eth)
        return 0;
 }
 
-static int __init mtk_init(struct net_device *dev)
-{
-       struct mtk_mac *mac = netdev_priv(dev);
-       struct mtk_eth *eth = mac->hw;
-       int ret;
-
-       ret = of_get_ethdev_address(mac->of_node, dev);
-       if (ret) {
-               /* If the mac address is invalid, use random mac address */
-               eth_hw_addr_random(dev);
-               dev_err(eth->dev, "generated random MAC address %pM\n",
-                       dev->dev_addr);
-       }
-
-       return 0;
-}
-
 static void mtk_uninit(struct net_device *dev)
 {
        struct mtk_mac *mac = netdev_priv(dev);
@@ -4278,7 +4261,6 @@ static const struct ethtool_ops mtk_ethtool_ops = {
 };
 
 static const struct net_device_ops mtk_netdev_ops = {
-       .ndo_init               = mtk_init,
        .ndo_uninit             = mtk_uninit,
        .ndo_open               = mtk_open,
        .ndo_stop               = mtk_stop,
@@ -4340,6 +4322,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
        mac->hw = eth;
        mac->of_node = np;
 
+       err = of_get_ethdev_address(mac->of_node, eth->netdev[id]);
+       if (err == -EPROBE_DEFER)
+               return err;
+
+       if (err) {
+               /* If the mac address is invalid, use random mac address */
+               eth_hw_addr_random(eth->netdev[id]);
+               dev_err(eth->dev, "generated random MAC address %pM\n",
+                       eth->netdev[id]->dev_addr);
+       }
+
        memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
        mac->hwlro_ip_cnt = 0;
 
index 316fe2e..1a97fec 100644 (file)
@@ -98,7 +98,7 @@ mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
 
                acct = mtk_foe_entry_get_mib(ppe, i, NULL);
 
-               type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+               type = mtk_get_ib1_pkt_type(ppe->eth, entry->ib1);
                seq_printf(m, "%05x %s %7s", i,
                           mtk_foe_entry_state_str(state),
                           mtk_foe_pkt_type_str(type));
index 985cff9..3b651ef 100644 (file)
@@ -221,9 +221,13 @@ void mtk_wed_fe_reset(void)
 
        for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
                struct mtk_wed_hw *hw = hw_list[i];
-               struct mtk_wed_device *dev = hw->wed_dev;
+               struct mtk_wed_device *dev;
                int err;
 
+               if (!hw)
+                       break;
+
+               dev = hw->wed_dev;
                if (!dev || !dev->wlan.reset)
                        continue;
 
@@ -244,8 +248,12 @@ void mtk_wed_fe_reset_complete(void)
 
        for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
                struct mtk_wed_hw *hw = hw_list[i];
-               struct mtk_wed_device *dev = hw->wed_dev;
+               struct mtk_wed_device *dev;
+
+               if (!hw)
+                       break;
 
+               dev = hw->wed_dev;
                if (!dev || !dev->wlan.reset_complete)
                        continue;
 
index b012833..e869c65 100644 (file)
@@ -2,6 +2,7 @@
 /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
 
 #include "reporter_vnic.h"
+#include "en_stats.h"
 #include "devlink.h"
 
 #define VNIC_ENV_GET64(vnic_env_stats, c) \
@@ -36,55 +37,72 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
        if (err)
                return err;
 
-       err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues",
-                                       VNIC_ENV_GET64(&vnic, total_error_queues));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow",
-                                       VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun",
-                                       VNIC_ENV_GET64(&vnic, comp_eq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun",
-                                       VNIC_ENV_GET64(&vnic, async_eq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun",
-                                       VNIC_ENV_GET64(&vnic, cq_overrun));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command",
-                                       VNIC_ENV_GET64(&vnic, invalid_command));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command",
-                                       VNIC_ENV_GET64(&vnic, quota_exceeded_command));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
-                                       VNIC_ENV_GET64(&vnic, nic_receive_steering_discard));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
-                                       VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail));
-       if (err)
-               return err;
-
-       err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
-                                       VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
-       if (err)
-               return err;
+       if (MLX5_CAP_GEN(dev, vnic_env_queue_counters)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "total_error_queues",
+                                               VNIC_ENV_GET(&vnic, total_error_queues));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u32_pair_put(fmsg, "send_queue_priority_update_flow",
+                                               VNIC_ENV_GET(&vnic,
+                                                            send_queue_priority_update_flow));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, eq_overrun_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "comp_eq_overrun",
+                                               VNIC_ENV_GET(&vnic, comp_eq_overrun));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u32_pair_put(fmsg, "async_eq_overrun",
+                                               VNIC_ENV_GET(&vnic, async_eq_overrun));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, vnic_env_cq_overrun)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "cq_overrun",
+                                               VNIC_ENV_GET(&vnic, cq_overrun));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, invalid_command_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "invalid_command",
+                                               VNIC_ENV_GET(&vnic, invalid_command));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, quota_exceeded_count)) {
+               err = devlink_fmsg_u32_pair_put(fmsg, "quota_exceeded_command",
+                                               VNIC_ENV_GET(&vnic, quota_exceeded_command));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, nic_receive_steering_discard)) {
+               err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
+                                               VNIC_ENV_GET64(&vnic,
+                                                              nic_receive_steering_discard));
+               if (err)
+                       return err;
+       }
+
+       if (MLX5_CAP_GEN(dev, vnic_env_cnt_steering_fail)) {
+               err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
+                                               VNIC_ENV_GET64(&vnic,
+                                                              generated_pkt_steering_fail));
+               if (err)
+                       return err;
+
+               err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
+                                               VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
+               if (err)
+                       return err;
+       }
 
        err = devlink_fmsg_obj_nest_end(fmsg);
        if (err)
index 03cb79a..be83ad9 100644 (file)
@@ -594,7 +594,7 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
 
        err = fs_any_create_table(fs);
        if (err)
-               return err;
+               goto err_free_any;
 
        err = fs_any_enable(fs);
        if (err)
@@ -606,8 +606,8 @@ int mlx5e_fs_tt_redirect_any_create(struct mlx5e_flow_steering *fs)
 
 err_destroy_table:
        fs_any_destroy_table(fs_any);
-
-       kfree(fs_any);
+err_free_any:
        mlx5e_fs_set_any(fs, NULL);
+       kfree(fs_any);
        return err;
 }
index 3cbebfb..b0b429a 100644 (file)
@@ -729,8 +729,10 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params,
 
        c = kvzalloc_node(sizeof(*c), GFP_KERNEL, dev_to_node(mlx5_core_dma_dev(mdev)));
        cparams = kvzalloc(sizeof(*cparams), GFP_KERNEL);
-       if (!c || !cparams)
-               return -ENOMEM;
+       if (!c || !cparams) {
+               err = -ENOMEM;
+               goto err_free;
+       }
 
        c->priv     = priv;
        c->mdev     = priv->mdev;
index a254e72..fadfa8b 100644 (file)
@@ -1545,7 +1545,8 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
 
        attr->ct_attr.ct_action |= act->ct.action; /* So we can have clear + ct */
        attr->ct_attr.zone = act->ct.zone;
-       attr->ct_attr.nf_ft = act->ct.flow_table;
+       if (!(act->ct.action & TCA_CT_ACT_CLEAR))
+               attr->ct_attr.nf_ft = act->ct.flow_table;
        attr->ct_attr.act_miss_cookie = act->miss_cookie;
 
        return 0;
@@ -1990,6 +1991,9 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att
        if (!priv)
                return -EOPNOTSUPP;
 
+       if (attr->ct_attr.offloaded)
+               return 0;
+
        if (attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR) {
                err = mlx5_tc_ct_entry_set_registers(priv, &attr->parse_attr->mod_hdr_acts,
                                                     0, 0, 0, 0);
@@ -1999,11 +2003,15 @@ mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *att
                attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
        }
 
-       if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
+       if (!attr->ct_attr.nf_ft) { /* means only ct clear action, and not ct_clear,ct() */
+               attr->ct_attr.offloaded = true;
                return 0;
+       }
 
        mutex_lock(&priv->control_lock);
        err = __mlx5_tc_ct_flow_offload(priv, attr);
+       if (!err)
+               attr->ct_attr.offloaded = true;
        mutex_unlock(&priv->control_lock);
 
        return err;
@@ -2021,7 +2029,7 @@ void
 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
                       struct mlx5_flow_attr *attr)
 {
-       if (!attr->ct_attr.ft) /* no ct action, return */
+       if (!attr->ct_attr.offloaded) /* no ct action, return */
                return;
        if (!attr->ct_attr.nf_ft) /* means only ct clear action, and not ct_clear,ct() */
                return;
index 8e9316f..b66c5f9 100644 (file)
@@ -29,6 +29,7 @@ struct mlx5_ct_attr {
        u32 ct_labels_id;
        u32 act_miss_mapping;
        u64 act_miss_cookie;
+       bool offloaded;
        struct mlx5_ct_ft *ft;
 };
 
index f0c3464..1730f6a 100644 (file)
@@ -1030,9 +1030,6 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
        int out_index;
        int err = 0;
 
-       if (!mlx5e_is_eswitch_flow(flow))
-               return 0;
-
        parse_attr = attr->parse_attr;
        esw_attr = attr->esw_attr;
        *vf_tun = false;
@@ -1464,10 +1461,12 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
                attr = mlx5e_tc_get_encap_attr(flow);
                esw_attr = attr->esw_attr;
 
-               if (flow_flag_test(flow, SLOW))
+               if (flow_flag_test(flow, SLOW)) {
                        mlx5e_tc_unoffload_from_slow_path(esw, flow);
-               else
+               } else {
                        mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+                       mlx5e_tc_unoffload_flow_post_acts(flow);
+               }
 
                mlx5e_tc_detach_mod_hdr(priv, flow, attr);
                attr->modify_hdr = NULL;
index f0e6095..40589ce 100644 (file)
@@ -662,8 +662,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
                                /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
                                 * as we know this is a page_pool page.
                                 */
-                               page_pool_put_defragged_page(page->pp,
-                                                            page, -1, true);
+                               page_pool_recycle_direct(page->pp, page);
                        } while (++n < num);
 
                        break;
index 9e8e618..ecfe93a 100644 (file)
@@ -84,6 +84,8 @@ enum mlx5e_xdp_xmit_mode {
  * MLX5E_XDP_XMIT_MODE_XSK:
  *    none.
  */
+#define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4
+
 union mlx5e_xdp_info {
        enum mlx5e_xdp_xmit_mode mode;
        union {
index d97e6df..b8dd744 100644 (file)
@@ -323,8 +323,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
        net_prefetch(mxbuf->xdp.data);
 
        prog = rcu_dereference(rq->xdp_prog);
-       if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf)))
+       if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
+               if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+                       wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
                return NULL; /* page/packet was consumed by XDP */
+       }
 
        /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
         * will be handled by mlx5e_free_rx_wqe.
index 88a5aed..c7d191f 100644 (file)
@@ -190,6 +190,7 @@ static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
        in = kvzalloc(inlen, GFP_KERNEL);
        if  (!in || !ft->g) {
                kfree(ft->g);
+               ft->g = NULL;
                kvfree(in);
                return -ENOMEM;
        }
index dbe87bf..832d36b 100644 (file)
@@ -808,9 +808,9 @@ static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upsp
        }
 
        if (upspec->sport) {
-               MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport,
+               MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_sport,
                         upspec->sport_mask);
-               MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->sport);
+               MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_sport, upspec->sport);
        }
 }
 
index eab5bc7..8d995e3 100644 (file)
@@ -58,7 +58,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
 
        trailer_len = alen + plen + 2;
 
-       pskb_trim(skb, skb->len - trailer_len);
+       ret = pskb_trim(skb, skb->len - trailer_len);
+       if (unlikely(ret))
+               return ret;
        if (skb->protocol == htons(ETH_P_IP)) {
                ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
                ip_send_check(ipv4hdr);
index cf704f1..984fa04 100644 (file)
@@ -188,7 +188,6 @@ static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls,
 
 int mlx5e_ktls_init(struct mlx5e_priv *priv)
 {
-       struct mlx5_crypto_dek_pool *dek_pool;
        struct mlx5e_tls *tls;
 
        if (!mlx5e_is_ktls_device(priv->mdev))
@@ -199,12 +198,6 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv)
                return -ENOMEM;
        tls->mdev = priv->mdev;
 
-       dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY);
-       if (IS_ERR(dek_pool)) {
-               kfree(tls);
-               return PTR_ERR(dek_pool);
-       }
-       tls->dek_pool = dek_pool;
        priv->tls = tls;
 
        mlx5e_tls_debugfs_init(tls, priv->dfs_root);
@@ -222,7 +215,6 @@ void mlx5e_ktls_cleanup(struct mlx5e_priv *priv)
        debugfs_remove_recursive(tls->debugfs.dfs);
        tls->debugfs.dfs = NULL;
 
-       mlx5_crypto_dek_pool_destroy(tls->dek_pool);
        kfree(priv->tls);
        priv->tls = NULL;
 }
index efb2cf7..d61be26 100644 (file)
@@ -908,28 +908,51 @@ static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls,
 
 int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
 {
+       struct mlx5_crypto_dek_pool *dek_pool;
        struct mlx5e_tls *tls = priv->tls;
+       int err;
+
+       if (!mlx5e_is_ktls_device(priv->mdev))
+               return 0;
+
+       /* DEK pool could be used by either or both of TX and RX. But we have to
+        * put the creation here to avoid syndrome when doing devlink reload.
+        */
+       dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY);
+       if (IS_ERR(dek_pool))
+               return PTR_ERR(dek_pool);
+       tls->dek_pool = dek_pool;
 
        if (!mlx5e_is_ktls_tx(priv->mdev))
                return 0;
 
        priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats);
-       if (!priv->tls->tx_pool)
-               return -ENOMEM;
+       if (!priv->tls->tx_pool) {
+               err = -ENOMEM;
+               goto err_tx_pool_init;
+       }
 
        mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs);
 
        return 0;
+
+err_tx_pool_init:
+       mlx5_crypto_dek_pool_destroy(dek_pool);
+       return err;
 }
 
 void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
 {
        if (!mlx5e_is_ktls_tx(priv->mdev))
-               return;
+               goto dek_pool_destroy;
 
        debugfs_remove_recursive(priv->tls->debugfs.dfs_tx);
        priv->tls->debugfs.dfs_tx = NULL;
 
        mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool);
        priv->tls->tx_pool = NULL;
+
+dek_pool_destroy:
+       if (mlx5e_is_ktls_device(priv->mdev))
+               mlx5_crypto_dek_pool_destroy(priv->tls->dek_pool);
 }
index 7fc901a..414e285 100644 (file)
@@ -161,6 +161,7 @@ static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
 
        if (!in) {
                kfree(ft->g);
+               ft->g = NULL;
                return -ENOMEM;
        }
 
index 933a777..5aa51d7 100644 (file)
@@ -135,6 +135,16 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs);
 
 int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
 {
+       /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile
+        * cleanup_rx callback and it is not recreated when
+        * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering()
+        * is not called by the uplink_rep profile init_rx callback. Thus, if
+        * ntuple is set, moving to switchdev flow will enter this function
+        * with fs->arfs nullified.
+        */
+       if (!mlx5e_fs_get_arfs(fs))
+               return 0;
+
        arfs_del_rules(fs);
 
        return arfs_disable(fs);
index defb1ef..f7b4941 100644 (file)
@@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s
        return err;
 }
 
-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq)
+{
+       struct mlx5_cqwq *cqwq = &rq->cq.wq;
+       struct mlx5_cqe64 *cqe;
+
+       if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) {
+               while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq)))
+                       mlx5_cqwq_pop(cqwq);
+       } else {
+               while ((cqe = mlx5_cqwq_get_cqe(cqwq)))
+                       mlx5_cqwq_pop(cqwq);
+       }
+
+       mlx5_cqwq_update_db_record(cqwq);
+}
+
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
 {
        struct net_device *dev = rq->netdev;
        int err;
@@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
                netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
                return err;
        }
+
+       mlx5e_free_rx_descs(rq);
+       mlx5e_flush_rq_cq(rq);
+
        err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
        if (err) {
                netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
@@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
        return 0;
 }
 
-int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
-{
-       mlx5e_free_rx_descs(rq);
-
-       return mlx5e_rq_to_ready(rq, curr_state);
-}
-
 static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
 {
        struct mlx5_core_dev *mdev = rq->mdev;
@@ -1285,11 +1298,13 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
 {
        struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
        int wq_sz        = mlx5_wq_cyc_get_size(&sq->wq);
-       int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of
-                                                          * entries of all xmit_modes.
-                                                          */
+       int entries;
        size_t size;
 
+       /* upper bound for maximum num of entries of all xmit_modes. */
+       entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS *
+                                    MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO);
+
        size = array_size(sizeof(*xdpi_fifo->xi), entries);
        xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
        if (!xdpi_fifo->xi)
@@ -5253,6 +5268,7 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
 static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
                          struct net_device *netdev)
 {
+       const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED;
        struct mlx5e_priv *priv = netdev_priv(netdev);
        struct mlx5e_flow_steering *fs;
        int err;
@@ -5281,9 +5297,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
                mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 
        mlx5e_health_create_reporters(priv);
+
+       /* If netdev is already registered (e.g. move from uplink to nic profile),
+        * RTNL lock must be held before triggering netdev notifiers.
+        */
+       if (take_rtnl)
+               rtnl_lock();
+
        /* update XDP supported features */
        mlx5e_set_xdp_feature(netdev);
 
+       if (take_rtnl)
+               rtnl_unlock();
+
        return 0;
 }
 
index 152b621..99b3843 100644 (file)
@@ -1012,7 +1012,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
        err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
        if (err) {
                mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
-               return err;
+               goto err_rx_res_free;
        }
 
        err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
@@ -1046,6 +1046,7 @@ err_destroy_rx_res:
        mlx5e_rx_res_destroy(priv->rx_res);
 err_close_drop_rq:
        mlx5e_close_drop_rq(&priv->drop_rq);
+err_rx_res_free:
        mlx5e_rx_res_free(priv->rx_res);
        priv->rx_res = NULL;
 err_free_fs:
@@ -1159,6 +1160,10 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
                return err;
        }
 
+       err = mlx5e_rep_neigh_init(rpriv);
+       if (err)
+               goto err_neigh_init;
+
        if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
                err = mlx5e_init_uplink_rep_tx(rpriv);
                if (err)
@@ -1175,6 +1180,8 @@ err_ht_init:
        if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
                mlx5e_cleanup_uplink_rep_tx(rpriv);
 err_init_tx:
+       mlx5e_rep_neigh_cleanup(rpriv);
+err_neigh_init:
        mlx5e_destroy_tises(priv);
        return err;
 }
@@ -1188,22 +1195,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
        if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
                mlx5e_cleanup_uplink_rep_tx(rpriv);
 
+       mlx5e_rep_neigh_cleanup(rpriv);
        mlx5e_destroy_tises(priv);
 }
 
 static void mlx5e_rep_enable(struct mlx5e_priv *priv)
 {
-       struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
        mlx5e_set_netdev_mtu_boundaries(priv);
-       mlx5e_rep_neigh_init(rpriv);
 }
 
 static void mlx5e_rep_disable(struct mlx5e_priv *priv)
 {
-       struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
-       mlx5e_rep_neigh_cleanup(rpriv);
 }
 
 static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
@@ -1253,7 +1255,6 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event
 
 static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 {
-       struct mlx5e_rep_priv *rpriv = priv->ppriv;
        struct net_device *netdev = priv->netdev;
        struct mlx5_core_dev *mdev = priv->mdev;
        u16 max_mtu;
@@ -1275,7 +1276,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
        mlx5_notifier_register(mdev, &priv->events_nb);
        mlx5e_dcbnl_initialize(priv);
        mlx5e_dcbnl_init_app(priv);
-       mlx5e_rep_neigh_init(rpriv);
        mlx5e_rep_bridge_init(priv);
 
        netdev->wanted_features |= NETIF_F_HW_TC;
@@ -1290,7 +1290,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 
 static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 {
-       struct mlx5e_rep_priv *rpriv = priv->ppriv;
        struct mlx5_core_dev *mdev = priv->mdev;
 
        rtnl_lock();
@@ -1300,7 +1299,6 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
        rtnl_unlock();
 
        mlx5e_rep_bridge_cleanup(priv);
-       mlx5e_rep_neigh_cleanup(rpriv);
        mlx5e_dcbnl_delete_app(priv);
        mlx5_notifier_unregister(mdev, &priv->events_nb);
        mlx5e_rep_tc_disable(priv);
index 704b022..41d3715 100644 (file)
@@ -390,10 +390,18 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 {
        struct mlx5e_wqe_frag_info *wi = get_frag(rq, ix);
 
-       if (rq->xsk_pool)
+       if (rq->xsk_pool) {
                mlx5e_xsk_free_rx_wqe(wi);
-       else
+       } else {
                mlx5e_free_rx_wqe(rq, wi);
+
+               /* Avoid a second release of the wqe pages: dealloc is called
+                * for the same missing wqes on regular RQ flush and on regular
+                * RQ close. This happens when XSK RQs come into play.
+                */
+               for (int i = 0; i < rq->wqe.info.num_frags; i++, wi++)
+                       wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
+       }
 }
 
 static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
@@ -1743,11 +1751,11 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
 
        prog = rcu_dereference(rq->xdp_prog);
        if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) {
-               if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
+               if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
                        struct mlx5e_wqe_frag_info *pwi;
 
                        for (pwi = head_wi; pwi < wi; pwi++)
-                               pwi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
+                               pwi->frag_page->frags++;
                }
                return NULL; /* page/packet was consumed by XDP */
        }
@@ -1817,12 +1825,8 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
                              rq, wi, cqe, cqe_bcnt);
        if (!skb) {
                /* probably for XDP */
-               if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
-                       /* do not return page to cache,
-                        * it will be returned on XDP_TX completion.
-                        */
-                       wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
-               }
+               if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+                       wi->frag_page->frags++;
                goto wq_cyc_pop;
        }
 
@@ -1868,12 +1872,8 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
                              rq, wi, cqe, cqe_bcnt);
        if (!skb) {
                /* probably for XDP */
-               if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
-                       /* do not return page to cache,
-                        * it will be returned on XDP_TX completion.
-                        */
-                       wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
-               }
+               if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
+                       wi->frag_page->frags++;
                goto wq_cyc_pop;
        }
 
@@ -2052,12 +2052,12 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
        if (prog) {
                if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
                        if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
-                               int i;
+                               struct mlx5e_frag_page *pfp;
+
+                               for (pfp = head_page; pfp < frag_page; pfp++)
+                                       pfp->frags++;
 
-                               for (i = 0; i < sinfo->nr_frags; i++)
-                                       /* non-atomic */
-                                       __set_bit(page_idx + i, wi->skip_release_bitmap);
-                               return NULL;
+                               wi->linear_page.frags++;
                        }
                        mlx5e_page_release_fragmented(rq, &wi->linear_page);
                        return NULL; /* page/packet was consumed by XDP */
@@ -2155,7 +2155,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
                                 cqe_bcnt, &mxbuf);
                if (mlx5e_xdp_handle(rq, prog, &mxbuf)) {
                        if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
-                               __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
+                               frag_page->frags++;
                        return NULL; /* page/packet was consumed by XDP */
                }
 
index 41dc268..31708d5 100644 (file)
@@ -1639,7 +1639,8 @@ static void remove_unready_flow(struct mlx5e_tc_flow *flow)
        uplink_priv = &rpriv->uplink_priv;
 
        mutex_lock(&uplink_priv->unready_flows_lock);
-       unready_flow_del(flow);
+       if (flow_flag_test(flow, NOT_READY))
+               unready_flow_del(flow);
        mutex_unlock(&uplink_priv->unready_flows_lock);
 }
 
@@ -1724,6 +1725,19 @@ verify_attr_actions(u32 actions, struct netlink_ext_ack *extack)
        return 0;
 }
 
+static bool
+has_encap_dests(struct mlx5_flow_attr *attr)
+{
+       struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+       int out_index;
+
+       for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+               if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+                       return true;
+
+       return false;
+}
+
 static int
 post_process_attr(struct mlx5e_tc_flow *flow,
                  struct mlx5_flow_attr *attr,
@@ -1736,9 +1750,11 @@ post_process_attr(struct mlx5e_tc_flow *flow,
        if (err)
                goto err_out;
 
-       err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
-       if (err)
-               goto err_out;
+       if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) {
+               err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
+               if (err)
+                       goto err_out;
+       }
 
        if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
                err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr);
@@ -1927,13 +1943,10 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 {
        struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
        struct mlx5_flow_attr *attr = flow->attr;
-       struct mlx5_esw_flow_attr *esw_attr;
 
-       esw_attr = attr->esw_attr;
        mlx5e_put_flow_tunnel_id(flow);
 
-       if (flow_flag_test(flow, NOT_READY))
-               remove_unready_flow(flow);
+       remove_unready_flow(flow);
 
        if (mlx5e_is_offloaded_flow(flow)) {
                if (flow_flag_test(flow, SLOW))
@@ -1951,12 +1964,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 
        mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
 
-       if (esw_attr->int_port)
-               mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
-
-       if (esw_attr->dest_int_port)
-               mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
-
        if (flow_flag_test(flow, L3_TO_L2_DECAP))
                mlx5e_detach_decap(priv, flow);
 
@@ -4253,6 +4260,7 @@ static void
 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
 {
        struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+       struct mlx5_esw_flow_attr *esw_attr;
 
        if (!attr)
                return;
@@ -4270,6 +4278,18 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a
                mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
        }
 
+       if (mlx5e_is_eswitch_flow(flow)) {
+               esw_attr = attr->esw_attr;
+
+               if (esw_attr->int_port)
+                       mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+                                             esw_attr->int_port);
+
+               if (esw_attr->dest_int_port)
+                       mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+                                             esw_attr->dest_int_port);
+       }
+
        mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
 
        free_branch_attr(flow, attr->branch_true);
index b6a45ef..dbd7cbe 100644 (file)
@@ -64,7 +64,7 @@ void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_
 
        bridge->debugfs_dir = debugfs_create_dir(br_netdev->name,
                                                 bridge->br_offloads->debugfs_root);
-       debugfs_create_file("fdb", 0444, bridge->debugfs_dir, bridge,
+       debugfs_create_file("fdb", 0400, bridge->debugfs_dir, bridge,
                            &mlx5_esw_bridge_debugfs_fops);
 }
 
index af779c7..fdf2be5 100644 (file)
@@ -60,7 +60,7 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16
        }  else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) {
                memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
                dl_port->attrs.switch_id.id_len = ppid.id_len;
-               devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+               devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum,
                                              vport_num - 1, false);
        }
        return dl_port;
index faec7d7..243c455 100644 (file)
@@ -807,6 +807,9 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *
        hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
        vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce);
 
+       if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2))
+               goto out_free;
+
        memset(query_ctx, 0, query_out_sz);
        err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx,
                                            MLX5_CAP_GENERAL_2);
index bdfe609..e59380e 100644 (file)
@@ -1436,7 +1436,6 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
 
        esw_init_chains_offload_flags(esw, &attr.flags);
        attr.ns = MLX5_FLOW_NAMESPACE_FDB;
-       attr.fs_base_prio = FDB_TC_OFFLOAD;
        attr.max_grp_num = esw->params.large_group_num;
        attr.default_ft = miss_fdb;
        attr.mapping = esw->offloads.reg_c0_obj_pool;
@@ -2779,9 +2778,9 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
                                         struct mlx5_eswitch *peer_esw,
                                         bool pair)
 {
-       u8 peer_idx = mlx5_get_dev_index(peer_esw->dev);
+       u16 peer_vhca_id = MLX5_CAP_GEN(peer_esw->dev, vhca_id);
+       u16 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
        struct mlx5_flow_root_namespace *peer_ns;
-       u8 idx = mlx5_get_dev_index(esw->dev);
        struct mlx5_flow_root_namespace *ns;
        int err;
 
@@ -2789,18 +2788,18 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
        ns = esw->dev->priv.steering->fdb_root_ns;
 
        if (pair) {
-               err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_idx);
+               err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_vhca_id);
                if (err)
                        return err;
 
-               err = mlx5_flow_namespace_set_peer(peer_ns, ns, idx);
+               err = mlx5_flow_namespace_set_peer(peer_ns, ns, vhca_id);
                if (err) {
-                       mlx5_flow_namespace_set_peer(ns, NULL, peer_idx);
+                       mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id);
                        return err;
                }
        } else {
-               mlx5_flow_namespace_set_peer(ns, NULL, peer_idx);
-               mlx5_flow_namespace_set_peer(peer_ns, NULL, idx);
+               mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id);
+               mlx5_flow_namespace_set_peer(peer_ns, NULL, vhca_id);
        }
 
        return 0;
@@ -4196,7 +4195,7 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
        }
 
        hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
-       MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1);
+       MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable);
 
        err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport,
                                            MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
index 91dcb0d..244cfd4 100644 (file)
@@ -140,7 +140,7 @@ static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace
 
 static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns,
                                  struct mlx5_flow_root_namespace *peer_ns,
-                                 u8 peer_idx)
+                                 u16 peer_vhca_id)
 {
        return 0;
 }
@@ -245,12 +245,20 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
            mlx5_lag_is_shared_fdb(dev) &&
            mlx5_lag_is_master(dev)) {
                struct mlx5_core_dev *peer_dev;
-               int i;
+               int i, j;
 
                mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
                        err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect,
                                                          (!disconnect) ? ft->id : 0);
                        if (err && !disconnect) {
+                               mlx5_lag_for_each_peer_mdev(dev, peer_dev, j) {
+                                       if (j < i)
+                                               mlx5_cmd_set_slave_root_fdb(dev, peer_dev, 1,
+                                                                           ns->root_ft->id);
+                                       else
+                                               break;
+                               }
+
                                MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
                                MLX5_SET(set_flow_table_root_in, in, table_id,
                                         ns->root_ft->id);
index b6b9a5a..7790ae5 100644 (file)
@@ -94,7 +94,7 @@ struct mlx5_flow_cmds {
 
        int (*set_peer)(struct mlx5_flow_root_namespace *ns,
                        struct mlx5_flow_root_namespace *peer_ns,
-                       u8 peer_idx);
+                       u16 peer_vhca_id);
 
        int (*create_ns)(struct mlx5_flow_root_namespace *ns);
        int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
index 4ef04aa..6b069fa 100644 (file)
@@ -889,7 +889,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
        struct fs_node *iter = list_entry(start, struct fs_node, list);
        struct mlx5_flow_table *ft = NULL;
 
-       if (!root || root->type == FS_TYPE_PRIO_CHAINS)
+       if (!root)
                return NULL;
 
        list_for_each_advance_continue(iter, &root->children, reverse) {
@@ -905,20 +905,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
        return ft;
 }
 
-/* If reverse is false then return the first flow table in next priority of
- * prio in the tree, else return the last flow table in the previous priority
- * of prio in the tree.
+static struct fs_node *find_prio_chains_parent(struct fs_node *parent,
+                                              struct fs_node **child)
+{
+       struct fs_node *node = NULL;
+
+       while (parent && parent->type != FS_TYPE_PRIO_CHAINS) {
+               node = parent;
+               parent = parent->parent;
+       }
+
+       if (child)
+               *child = node;
+
+       return parent;
+}
+
+/* If reverse is false then return the first flow table next to the passed node
+ * in the tree, else return the last flow table before the node in the tree.
+ * If skip is true, skip the flow tables in the same prio_chains prio.
  */
-static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
+static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse,
+                                              bool skip)
 {
+       struct fs_node *prio_chains_parent = NULL;
        struct mlx5_flow_table *ft = NULL;
        struct fs_node *curr_node;
        struct fs_node *parent;
 
-       parent = prio->node.parent;
-       curr_node = &prio->node;
+       if (skip)
+               prio_chains_parent = find_prio_chains_parent(node, NULL);
+       parent = node->parent;
+       curr_node = node;
        while (!ft && parent) {
-               ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
+               if (parent != prio_chains_parent)
+                       ft = find_closest_ft_recursive(parent, &curr_node->list,
+                                                      reverse);
                curr_node = parent;
                parent = curr_node->parent;
        }
@@ -926,15 +948,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers
 }
 
 /* Assuming all the tree is locked by mutex chain lock */
-static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
+static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node)
 {
-       return find_closest_ft(prio, false);
+       return find_closest_ft(node, false, true);
 }
 
 /* Assuming all the tree is locked by mutex chain lock */
-static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
+static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node)
 {
-       return find_closest_ft(prio, true);
+       return find_closest_ft(node, true, true);
 }
 
 static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
@@ -946,7 +968,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
        next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
        fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
 
-       return find_next_chained_ft(prio);
+       return find_next_chained_ft(&prio->node);
 }
 
 static int connect_fts_in_prio(struct mlx5_core_dev *dev,
@@ -970,21 +992,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
        return 0;
 }
 
+static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node,
+                                                         struct fs_node *parent,
+                                                         struct fs_node **child,
+                                                         bool reverse)
+{
+       struct mlx5_flow_table *ft;
+
+       ft = find_closest_ft(node, reverse, false);
+
+       if (ft && parent == find_prio_chains_parent(&ft->node, child))
+               return ft;
+
+       return NULL;
+}
+
 /* Connect flow tables from previous priority of prio to ft */
 static int connect_prev_fts(struct mlx5_core_dev *dev,
                            struct mlx5_flow_table *ft,
                            struct fs_prio *prio)
 {
+       struct fs_node *prio_parent, *parent = NULL, *child, *node;
        struct mlx5_flow_table *prev_ft;
+       int err = 0;
+
+       prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+       /* return directly if not under the first sub ns of prio_chains prio */
+       if (prio_parent && !list_is_first(&child->list, &prio_parent->children))
+               return 0;
 
-       prev_ft = find_prev_chained_ft(prio);
-       if (prev_ft) {
+       prev_ft = find_prev_chained_ft(&prio->node);
+       while (prev_ft) {
                struct fs_prio *prev_prio;
 
                fs_get_obj(prev_prio, prev_ft->node.parent);
-               return connect_fts_in_prio(dev, prev_prio, ft);
+               err = connect_fts_in_prio(dev, prev_prio, ft);
+               if (err)
+                       break;
+
+               if (!parent) {
+                       parent = find_prio_chains_parent(&prev_prio->node, &child);
+                       if (!parent)
+                               break;
+               }
+
+               node = child;
+               prev_ft = find_closet_ft_prio_chains(node, parent, &child, true);
        }
-       return 0;
+       return err;
 }
 
 static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
@@ -1123,7 +1179,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table
                if (err)
                        return err;
 
-               next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
+               next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node);
                err = connect_fwd_rules(dev, ft, next_ft);
                if (err)
                        return err;
@@ -1198,7 +1254,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 
        tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
        next_ft = unmanaged ? ft_attr->next_ft :
-                             find_next_chained_ft(fs_prio);
+                             find_next_chained_ft(&fs_prio->node);
        ft->def_miss_action = ns->def_miss_action;
        ft->ns = ns;
        err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
@@ -2195,13 +2251,20 @@ EXPORT_SYMBOL(mlx5_del_flow_rules);
 /* Assuming prio->node.children(flow tables) is sorted by level */
 static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
 {
+       struct fs_node *prio_parent, *child;
        struct fs_prio *prio;
 
        fs_get_obj(prio, ft->node.parent);
 
        if (!list_is_last(&ft->node.list, &prio->node.children))
                return list_next_entry(ft, node.list);
-       return find_next_chained_ft(prio);
+
+       prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+       if (prio_parent && list_is_first(&child->list, &prio_parent->children))
+               return find_closest_ft(&prio->node, false, false);
+
+       return find_next_chained_ft(&prio->node);
 }
 
 static int update_root_ft_destroy(struct mlx5_flow_table *ft)
@@ -3621,7 +3684,7 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
 
 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
                                 struct mlx5_flow_root_namespace *peer_ns,
-                                u8 peer_idx)
+                                u16 peer_vhca_id)
 {
        if (peer_ns && ns->mode != peer_ns->mode) {
                mlx5_core_err(ns->dev,
@@ -3629,7 +3692,7 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
                return -EINVAL;
        }
 
-       return ns->cmds->set_peer(ns, peer_ns, peer_idx);
+       return ns->cmds->set_peer(ns, peer_ns, peer_vhca_id);
 }
 
 /* This function should be called only at init stage of the namespace.
index 03e64c4..4aed176 100644 (file)
@@ -303,7 +303,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
 
 int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
                                 struct mlx5_flow_root_namespace *peer_ns,
-                                u8 peer_idx);
+                                u16 peer_vhca_id);
 
 int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
                                 enum mlx5_flow_steering_mode mode);
index d3a3fe4..7d9bbb4 100644 (file)
@@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
        for (i = 0; i < ldev->ports; i++) {
                for (j = 0; j < ldev->buckets; j++) {
                        idx = i * ldev->buckets + j;
-                       if (ldev->v2p_map[i] == ports[i])
+                       if (ldev->v2p_map[idx] == ports[idx])
                                continue;
 
                        dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
index 973babf..377372f 100644 (file)
@@ -227,10 +227,15 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
        clock = container_of(timer, struct mlx5_clock, timer);
        mdev = container_of(clock, struct mlx5_core_dev, clock);
 
+       if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+               goto out;
+
        write_seqlock_irqsave(&clock->lock, flags);
        timecounter_read(&timer->tc);
        mlx5_update_clock_info_page(mdev);
        write_sequnlock_irqrestore(&clock->lock, flags);
+
+out:
        schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
 }
 
index db9df97..a80ecb6 100644 (file)
@@ -178,7 +178,7 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
        if (!mlx5_chains_ignore_flow_level_supported(chains) ||
            (chain == 0 && prio == 1 && level == 0)) {
                ft_attr.level = chains->fs_base_level;
-               ft_attr.prio = chains->fs_base_prio;
+               ft_attr.prio = chains->fs_base_prio + prio - 1;
                ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
                        mlx5_get_fdb_sub_ns(chains->dev, chain) :
                        mlx5_get_flow_namespace(chains->dev, chains->ns);
index 88dbea6..72ae560 100644 (file)
@@ -1506,6 +1506,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
        if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
                mlx5_core_warn(dev, "%s: interface is down, NOP\n",
                               __func__);
+               mlx5_devlink_params_unregister(priv_to_devlink(dev));
                mlx5_cleanup_once(dev);
                goto out;
        }
@@ -1988,7 +1989,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
 
        mlx5_enter_error_state(dev, false);
        mlx5_error_sw_reset(dev);
-       mlx5_unload_one(dev, true);
+       mlx5_unload_one(dev, false);
        mlx5_drain_health_wq(dev);
        mlx5_pci_disable_device(dev);
 
index c4be257..682d3dc 100644 (file)
@@ -361,7 +361,7 @@ static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16
 
 static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func)
 {
-       return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev)
+       return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + 1
                          : vport;
 }
 
index 4e42a3b..a2fc937 100644 (file)
@@ -285,8 +285,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
                host_total_vfs = MLX5_GET(query_esw_functions_out, out,
                                          host_params_context.host_total_vfs);
                kvfree(out);
-               if (host_total_vfs)
-                       return host_total_vfs;
+               return host_total_vfs;
        }
 
 done:
index e739ec6..54bb086 100644 (file)
@@ -2079,7 +2079,7 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
 
        peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) &&
                (vhca_id != dmn->info.caps.gvmi);
-       vport_dmn = peer_vport ? dmn->peer_dmn[vhca_id] : dmn;
+       vport_dmn = peer_vport ? xa_load(&dmn->peer_dmn_xa, vhca_id) : dmn;
        if (!vport_dmn) {
                mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n");
                return NULL;
index 7491911..8c2a34a 100644 (file)
@@ -564,11 +564,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
 
        err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
        if (err)
-               return err;
+               goto err_free_in;
 
        *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
-       kvfree(in);
 
+err_free_in:
+       kvfree(in);
        return err;
 }
 
index 75dc85d..3d74109 100644 (file)
@@ -475,6 +475,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
        mutex_init(&dmn->info.rx.mutex);
        mutex_init(&dmn->info.tx.mutex);
        xa_init(&dmn->definers_xa);
+       xa_init(&dmn->peer_dmn_xa);
 
        if (dr_domain_caps_init(mdev, dmn)) {
                mlx5dr_err(dmn, "Failed init domain, no caps\n");
@@ -507,6 +508,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
 uninit_caps:
        dr_domain_caps_uninit(dmn);
 def_xa_destroy:
+       xa_destroy(&dmn->peer_dmn_xa);
        xa_destroy(&dmn->definers_xa);
        kfree(dmn);
        return NULL;
@@ -547,6 +549,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
        dr_domain_uninit_csum_recalc_fts(dmn);
        dr_domain_uninit_resources(dmn);
        dr_domain_caps_uninit(dmn);
+       xa_destroy(&dmn->peer_dmn_xa);
        xa_destroy(&dmn->definers_xa);
        mutex_destroy(&dmn->info.tx.mutex);
        mutex_destroy(&dmn->info.rx.mutex);
@@ -556,17 +559,21 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
 
 void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
                            struct mlx5dr_domain *peer_dmn,
-                           u8 peer_idx)
+                           u16 peer_vhca_id)
 {
+       struct mlx5dr_domain *peer;
+
        mlx5dr_domain_lock(dmn);
 
-       if (dmn->peer_dmn[peer_idx])
-               refcount_dec(&dmn->peer_dmn[peer_idx]->refcount);
+       peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id);
+       if (peer)
+               refcount_dec(&peer->refcount);
 
-       dmn->peer_dmn[peer_idx] = peer_dmn;
+       WARN_ON(xa_err(xa_store(&dmn->peer_dmn_xa, peer_vhca_id, peer_dmn, GFP_KERNEL)));
 
-       if (dmn->peer_dmn[peer_idx])
-               refcount_inc(&dmn->peer_dmn[peer_idx]->refcount);
+       peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id);
+       if (peer)
+               refcount_inc(&peer->refcount);
 
        mlx5dr_domain_unlock(dmn);
 }
index d6947fe..8ca534e 100644 (file)
@@ -82,7 +82,7 @@ dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr,
        u32 chunk_size;
        u32 index;
 
-       chunk_size = ilog2(num_of_actions);
+       chunk_size = ilog2(roundup_pow_of_two(num_of_actions));
        /* HW modify action index granularity is at least 64B */
        chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
 
index 69d7a8f..f708b02 100644 (file)
@@ -1652,17 +1652,18 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
        struct mlx5dr_domain *dmn = sb->dmn;
        struct mlx5dr_domain *vport_dmn;
        u8 *bit_mask = sb->bit_mask;
+       struct mlx5dr_domain *peer;
        bool source_gvmi_set;
 
        DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
 
        if (sb->vhca_id_valid) {
+               peer = xa_load(&dmn->peer_dmn_xa, id);
                /* Find port GVMI based on the eswitch_owner_vhca_id */
                if (id == dmn->info.caps.gvmi)
                        vport_dmn = dmn;
-               else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] &&
-                        (id == dmn->peer_dmn[id]->info.caps.gvmi))
-                       vport_dmn = dmn->peer_dmn[id];
+               else if (peer && (id == peer->info.caps.gvmi))
+                       vport_dmn = peer;
                else
                        return -EINVAL;
 
index f4ef0b2..dd856cd 100644 (file)
@@ -1984,16 +1984,17 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
        struct mlx5dr_domain *dmn = sb->dmn;
        struct mlx5dr_domain *vport_dmn;
        u8 *bit_mask = sb->bit_mask;
+       struct mlx5dr_domain *peer;
 
        DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn);
 
        if (sb->vhca_id_valid) {
+               peer = xa_load(&dmn->peer_dmn_xa, id);
                /* Find port GVMI based on the eswitch_owner_vhca_id */
                if (id == dmn->info.caps.gvmi)
                        vport_dmn = dmn;
-               else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] &&
-                        (id == dmn->peer_dmn[id]->info.caps.gvmi))
-                       vport_dmn = dmn->peer_dmn[id];
+               else if (peer && (id == peer->info.caps.gvmi))
+                       vport_dmn = peer;
                else
                        return -EINVAL;
 
index 1622dbb..6c59de3 100644 (file)
@@ -935,7 +935,6 @@ struct mlx5dr_domain_info {
 };
 
 struct mlx5dr_domain {
-       struct mlx5dr_domain *peer_dmn[MLX5_MAX_PORTS];
        struct mlx5_core_dev *mdev;
        u32 pdn;
        struct mlx5_uars_page *uar;
@@ -956,6 +955,7 @@ struct mlx5dr_domain {
        struct list_head dbg_tbl_list;
        struct mlx5dr_dbg_dump_info dump_info;
        struct xarray definers_xa;
+       struct xarray peer_dmn_xa;
        /* memory management statistics */
        u32 num_buddies[DR_ICM_TYPE_MAX];
 };
index 6aac5f0..feb307f 100644 (file)
@@ -781,14 +781,14 @@ restore_fte:
 
 static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
                                struct mlx5_flow_root_namespace *peer_ns,
-                               u8 peer_idx)
+                               u16 peer_vhca_id)
 {
        struct mlx5dr_domain *peer_domain = NULL;
 
        if (peer_ns)
                peer_domain = peer_ns->fs_dr_domain.dr_domain;
        mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain,
-                              peer_domain, peer_idx);
+                              peer_domain, peer_vhca_id);
        return 0;
 }
 
index 24cbb33..89fced8 100644 (file)
@@ -49,7 +49,7 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
 
 void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
                            struct mlx5dr_domain *peer_dmn,
-                           u8 peer_idx);
+                           u16 peer_vhca_id);
 
 struct mlx5dr_table *
 mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags,
index 20bb5eb..52199d3 100644 (file)
@@ -68,14 +68,19 @@ static struct thermal_zone_device_ops mlx5_thermal_ops = {
 
 int mlx5_thermal_init(struct mlx5_core_dev *mdev)
 {
+       char data[THERMAL_NAME_LENGTH];
        struct mlx5_thermal *thermal;
-       struct thermal_zone_device *tzd;
-       const char *data = "mlx5";
+       int err;
 
-       tzd = thermal_zone_get_zone_by_name(data);
-       if (!IS_ERR(tzd))
+       if (!mlx5_core_is_pf(mdev) && !mlx5_core_is_ecpf(mdev))
                return 0;
 
+       err = snprintf(data, sizeof(data), "mlx5_%s", dev_name(mdev->device));
+       if (err < 0 || err >= sizeof(data)) {
+               mlx5_core_err(mdev, "Failed to setup thermal zone name, %d\n", err);
+               return -EINVAL;
+       }
+
        thermal = kzalloc(sizeof(*thermal), GFP_KERNEL);
        if (!thermal)
                return -ENOMEM;
@@ -89,10 +94,10 @@ int mlx5_thermal_init(struct mlx5_core_dev *mdev)
                                                                 &mlx5_thermal_ops,
                                                                 NULL, 0, MLX5_THERMAL_POLL_INT_MSEC);
        if (IS_ERR(thermal->tzdev)) {
-               dev_err(mdev->device, "Failed to register thermal zone device (%s) %ld\n",
-                       data, PTR_ERR(thermal->tzdev));
+               err = PTR_ERR(thermal->tzdev);
+               mlx5_core_err(mdev, "Failed to register thermal zone device (%s) %d\n", data, err);
                kfree(thermal);
-               return -EINVAL;
+               return err;
        }
 
        mdev->thermal = thermal;
index f0b2963..973de2a 100644 (file)
@@ -32,8 +32,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
        MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
        MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
        MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
-       MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 3),
-       MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 20, 8),
+       MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 4),
+       MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 21, 8),
        MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4),
        MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4),
        MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4),
index c968309..51eea1f 100644 (file)
@@ -517,11 +517,15 @@ static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci,
                                    struct sk_buff *skb,
                                    enum mlxsw_pci_cqe_v cqe_v, char *cqe)
 {
+       u8 ts_type;
+
        if (cqe_v != MLXSW_PCI_CQE_V2)
                return;
 
-       if (mlxsw_pci_cqe2_time_stamp_type_get(cqe) !=
-           MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC)
+       ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe);
+
+       if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC &&
+           ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC)
                return;
 
        mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe);
index 8165bf3..17160e8 100644 (file)
@@ -97,14 +97,6 @@ MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1);
  */
 MLXSW_ITEM32_LP(reg, sspr, 0x00, 16, 0x00, 12);
 
-/* reg_sspr_sub_port
- * Virtual port within the physical port.
- * Should be set to 0 when virtual ports are not enabled on the port.
- *
- * Access: RW
- */
-MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8);
-
 /* reg_sspr_system_port
  * Unique identifier within the stacking domain that represents all the ports
  * that are available in the system (external ports).
@@ -120,7 +112,6 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u16 local_port)
        MLXSW_REG_ZERO(sspr, payload);
        mlxsw_reg_sspr_m_set(payload, 1);
        mlxsw_reg_sspr_local_port_set(payload, local_port);
-       mlxsw_reg_sspr_sub_port_set(payload, 0);
        mlxsw_reg_sspr_system_port_set(payload, local_port);
 }
 
index e4f4cde..b1178b7 100644 (file)
@@ -193,7 +193,7 @@ mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule,
                                       key->vrid, GENMASK(7, 0));
        mlxsw_sp_acl_rulei_keymask_u32(rulei,
                                       MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
-                                      key->vrid >> 8, GENMASK(2, 0));
+                                      key->vrid >> 8, GENMASK(3, 0));
        switch (key->proto) {
        case MLXSW_SP_L3_PROTO_IPV4:
                return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key);
index 4dea39f..ae2d6f1 100644 (file)
@@ -171,7 +171,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = {
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = {
        MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8),
-       MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x00, 0, 3),
+       MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x00, 0, 3, 0, true),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = {
@@ -321,7 +321,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = {
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = {
        MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8),
-       MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x04, 21, 4, 0, true),
+       MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x04, 21, 4),
 };
 
 static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = {
index 24c994b..329e374 100644 (file)
@@ -46,7 +46,7 @@ config LAN743X
        tristate "LAN743x support"
        depends on PCI
        depends on PTP_1588_CLOCK_OPTIONAL
-       select PHYLIB
+       select FIXED_PHY
        select CRC16
        select CRC32
        help
index a499e46..c2ad092 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/ethtool.h>
 #include <linux/filter.h>
 #include <linux/mm.h>
+#include <linux/pci.h>
 
 #include <net/checksum.h>
 #include <net/ip6_checksum.h>
@@ -2345,9 +2346,12 @@ int mana_attach(struct net_device *ndev)
 static int mana_dealloc_queues(struct net_device *ndev)
 {
        struct mana_port_context *apc = netdev_priv(ndev);
+       unsigned long timeout = jiffies + 120 * HZ;
        struct gdma_dev *gd = apc->ac->gdma_dev;
        struct mana_txq *txq;
+       struct sk_buff *skb;
        int i, err;
+       u32 tsleep;
 
        if (apc->port_is_up)
                return -EINVAL;
@@ -2363,15 +2367,40 @@ static int mana_dealloc_queues(struct net_device *ndev)
         * to false, but it doesn't matter since mana_start_xmit() drops any
         * new packets due to apc->port_is_up being false.
         *
-        * Drain all the in-flight TX packets
+        * Drain all the in-flight TX packets.
+        * A timeout of 120 seconds for all the queues is used.
+        * This will break the while loop when h/w is not responding.
+        * This value of 120 has been decided here considering max
+        * number of queues.
         */
+
        for (i = 0; i < apc->num_queues; i++) {
                txq = &apc->tx_qp[i].txq;
-
-               while (atomic_read(&txq->pending_sends) > 0)
-                       usleep_range(1000, 2000);
+               tsleep = 1000;
+               while (atomic_read(&txq->pending_sends) > 0 &&
+                      time_before(jiffies, timeout)) {
+                       usleep_range(tsleep, tsleep + 1000);
+                       tsleep <<= 1;
+               }
+               if (atomic_read(&txq->pending_sends)) {
+                       err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
+                       if (err) {
+                               netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
+                                          err, atomic_read(&txq->pending_sends),
+                                          txq->gdma_txq_id);
+                       }
+                       break;
+               }
        }
 
+       for (i = 0; i < apc->num_queues; i++) {
+               txq = &apc->tx_qp[i].txq;
+               while ((skb = skb_dequeue(&txq->pending_skbs))) {
+                       mana_unmap_skb(skb, apc);
+                       dev_kfree_skb_any(skb);
+               }
+               atomic_set(&txq->pending_sends, 0);
+       }
        /* We're 100% sure the queues can no longer be woken up, because
         * we're sure now mana_poll_tx_cq() can't be running.
         */
index 2fa833d..56ccbd4 100644 (file)
@@ -2927,7 +2927,6 @@ int ocelot_init(struct ocelot *ocelot)
 
        mutex_init(&ocelot->mact_lock);
        mutex_init(&ocelot->fwd_domain_lock);
-       mutex_init(&ocelot->tas_lock);
        spin_lock_init(&ocelot->ptp_clock_lock);
        spin_lock_init(&ocelot->ts_id_lock);
 
index 8e3894c..83a3ce0 100644 (file)
@@ -368,7 +368,8 @@ static bool ocelot_fdma_receive_skb(struct ocelot *ocelot, struct sk_buff *skb)
        if (unlikely(!ndev))
                return false;
 
-       pskb_trim(skb, skb->len - ETH_FCS_LEN);
+       if (pskb_trim(skb, skb->len - ETH_FCS_LEN))
+               return false;
 
        skb->dev = ndev;
        skb->protocol = eth_type_trans(skb, skb->dev);
index fb31451..c815ae6 100644 (file)
@@ -67,10 +67,13 @@ void ocelot_port_update_active_preemptible_tcs(struct ocelot *ocelot, int port)
                val = mm->preemptible_tcs;
 
        /* Cut through switching doesn't work for preemptible priorities,
-        * so first make sure it is disabled.
+        * so first make sure it is disabled. Also, changing the preemptible
+        * TCs affects the oversized frame dropping logic, so that needs to be
+        * re-triggered. And since tas_guard_bands_update() also implicitly
+        * calls cut_through_fwd(), we don't need to explicitly call it.
         */
        mm->active_preemptible_tcs = val;
-       ocelot->ops->cut_through_fwd(ocelot);
+       ocelot->ops->tas_guard_bands_update(ocelot, port);
 
        dev_dbg(ocelot->dev,
                "port %d %s/%s, MM TX %s, preemptible TCs 0x%x, active 0x%x\n",
@@ -89,17 +92,14 @@ void ocelot_port_change_fp(struct ocelot *ocelot, int port,
 {
        struct ocelot_mm_state *mm = &ocelot->mm[port];
 
-       mutex_lock(&ocelot->fwd_domain_lock);
+       lockdep_assert_held(&ocelot->fwd_domain_lock);
 
        if (mm->preemptible_tcs == preemptible_tcs)
-               goto out_unlock;
+               return;
 
        mm->preemptible_tcs = preemptible_tcs;
 
        ocelot_port_update_active_preemptible_tcs(ocelot, port);
-
-out_unlock:
-       mutex_unlock(&ocelot->fwd_domain_lock);
 }
 
 static void ocelot_mm_update_port_status(struct ocelot *ocelot, int port)
index b8678da..ab7d217 100644 (file)
@@ -353,12 +353,6 @@ err_out_reset:
        ionic_reset(ionic);
 err_out_teardown:
        ionic_dev_teardown(ionic);
-       pci_clear_master(pdev);
-       /* Don't fail the probe for these errors, keep
-        * the hw interface around for inspection
-        */
-       return 0;
-
 err_out_unmap_bars:
        ionic_unmap_bars(ionic);
 err_out_pci_release_regions:
index 7c20a44..432fb93 100644 (file)
@@ -475,11 +475,6 @@ static void ionic_qcqs_free(struct ionic_lif *lif)
 static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
                                      struct ionic_qcq *n_qcq)
 {
-       if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) {
-               ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index);
-               n_qcq->flags &= ~IONIC_QCQ_F_INTR;
-       }
-
        n_qcq->intr.vector = src_qcq->intr.vector;
        n_qcq->intr.index = src_qcq->intr.index;
        n_qcq->napi_qcq = src_qcq->napi_qcq;
@@ -1822,6 +1817,7 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
 static void ionic_tx_timeout_work(struct work_struct *ws)
 {
        struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
+       int err;
 
        if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
                return;
@@ -1834,8 +1830,11 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
 
        mutex_lock(&lif->queue_lock);
        ionic_stop_queues_reconfig(lif);
-       ionic_start_queues_reconfig(lif);
+       err = ionic_start_queues_reconfig(lif);
        mutex_unlock(&lif->queue_lock);
+
+       if (err)
+               dev_err(lif->ionic->dev, "%s: Restarting queues failed\n", __func__);
 }
 
 static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
@@ -2805,17 +2804,22 @@ static int ionic_cmb_reconfig(struct ionic_lif *lif,
                        if (err) {
                                dev_err(lif->ionic->dev,
                                        "CMB restore failed: %d\n", err);
-                               goto errout;
+                               goto err_out;
                        }
                }
 
-               ionic_start_queues_reconfig(lif);
-       } else {
-               /* This was detached in ionic_stop_queues_reconfig() */
-               netif_device_attach(lif->netdev);
+               err = ionic_start_queues_reconfig(lif);
+               if (err) {
+                       dev_err(lif->ionic->dev,
+                               "CMB reconfig failed: %d\n", err);
+                       goto err_out;
+               }
        }
 
-errout:
+err_out:
+       /* This was detached in ionic_stop_queues_reconfig() */
+       netif_device_attach(lif->netdev);
+
        return err;
 }
 
index f868235..94d4f94 100644 (file)
@@ -194,6 +194,22 @@ void qed_hw_remove(struct qed_dev *cdev);
 struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
 
 /**
+ * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
+ *                           atomicy.
+ *
+ * @p_hwfn: HW device data.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: struct qed_ptt.
+ *
+ * Should be called at the entry point to the driver
+ * (at the beginning of an exported function).
+ */
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
+                                       bool is_atomic);
+
+/**
  * qed_ptt_release(): Release PTT Window.
  *
  * @p_hwfn: HW device data.
index 3764190..04602ac 100644 (file)
@@ -693,13 +693,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
 }
 
 static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
-                             struct qed_fcoe_stats *p_stats)
+                             struct qed_fcoe_stats *p_stats,
+                             bool is_atomic)
 {
        struct qed_ptt *p_ptt;
 
        memset(p_stats, 0, sizeof(*p_stats));
 
-       p_ptt = qed_ptt_acquire(p_hwfn);
+       p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
 
        if (!p_ptt) {
                DP_ERR(p_hwfn, "Failed to acquire ptt\n");
@@ -973,19 +974,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
                                        QED_SPQ_MODE_EBLOCK, NULL);
 }
 
+static int qed_fcoe_stats_context(struct qed_dev *cdev,
+                                 struct qed_fcoe_stats *stats,
+                                 bool is_atomic)
+{
+       return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
+}
+
 static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
 {
-       return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
+       return qed_fcoe_stats_context(cdev, stats, false);
 }
 
 void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
-                                struct qed_mcp_fcoe_stats *stats)
+                                struct qed_mcp_fcoe_stats *stats,
+                                bool is_atomic)
 {
        struct qed_fcoe_stats proto_stats;
 
        /* Retrieve FW statistics */
        memset(&proto_stats, 0, sizeof(proto_stats));
-       if (qed_fcoe_stats(cdev, &proto_stats)) {
+       if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
                DP_VERBOSE(cdev, QED_MSG_STORAGE,
                           "Failed to collect FCoE statistics\n");
                return;
index 19c85ad..214e829 100644 (file)
@@ -28,8 +28,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
 void qed_fcoe_setup(struct qed_hwfn *p_hwfn);
 
 void qed_fcoe_free(struct qed_hwfn *p_hwfn);
+/**
+ * qed_get_protocol_stats_fcoe(): Fills provided statistics
+ *                               struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
 void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
-                                struct qed_mcp_fcoe_stats *stats);
+                                struct qed_mcp_fcoe_stats *stats,
+                                bool is_atomic);
 #else /* CONFIG_QED_FCOE */
 static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
 {
@@ -40,7 +52,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
 static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}
 
 static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
-                                              struct qed_mcp_fcoe_stats *stats)
+                                              struct qed_mcp_fcoe_stats *stats,
+                                              bool is_atomic)
 {
 }
 #endif /* CONFIG_QED_FCOE */
index 554f30b..6263f84 100644 (file)
 #include "qed_reg_addr.h"
 #include "qed_sriov.h"
 
-#define QED_BAR_ACQUIRE_TIMEOUT 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT     1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP         1000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT     100000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY         10
 
 /* Invalid values */
 #define QED_BAR_INVALID_OFFSET          (cpu_to_le32(-1))
@@ -85,11 +88,21 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
 
 struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
 {
+       return qed_ptt_acquire_context(p_hwfn, false);
+}
+
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic)
+{
        struct qed_ptt *p_ptt;
-       unsigned int i;
+       unsigned int i, count;
+
+       if (is_atomic)
+               count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
+       else
+               count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;
 
        /* Take the free PTT from the list */
-       for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
+       for (i = 0; i < count; i++) {
                spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
 
                if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
@@ -105,7 +118,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
                }
 
                spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
-               usleep_range(1000, 2000);
+
+               if (is_atomic)
+                       udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
+               else
+                       usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
+                                    QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
        }
 
        DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
index 511ab21..980e728 100644 (file)
@@ -999,13 +999,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
 }
 
 static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
-                              struct qed_iscsi_stats *stats)
+                              struct qed_iscsi_stats *stats,
+                              bool is_atomic)
 {
        struct qed_ptt *p_ptt;
 
        memset(stats, 0, sizeof(*stats));
 
-       p_ptt = qed_ptt_acquire(p_hwfn);
+       p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
        if (!p_ptt) {
                DP_ERR(p_hwfn, "Failed to acquire ptt\n");
                return -EAGAIN;
@@ -1336,9 +1337,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
                                           QED_SPQ_MODE_EBLOCK, NULL);
 }
 
+static int qed_iscsi_stats_context(struct qed_dev *cdev,
+                                  struct qed_iscsi_stats *stats,
+                                  bool is_atomic)
+{
+       return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
+}
+
 static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
 {
-       return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
+       return qed_iscsi_stats_context(cdev, stats, false);
 }
 
 static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1358,13 +1366,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
 }
 
 void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
-                                 struct qed_mcp_iscsi_stats *stats)
+                                 struct qed_mcp_iscsi_stats *stats,
+                                 bool is_atomic)
 {
        struct qed_iscsi_stats proto_stats;
 
        /* Retrieve FW statistics */
        memset(&proto_stats, 0, sizeof(proto_stats));
-       if (qed_iscsi_stats(cdev, &proto_stats)) {
+       if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
                DP_VERBOSE(cdev, QED_MSG_STORAGE,
                           "Failed to collect ISCSI statistics\n");
                return;
index dec2b00..974cb8d 100644 (file)
@@ -39,11 +39,14 @@ void qed_iscsi_free(struct qed_hwfn *p_hwfn);
  *
  * @cdev: Qed dev pointer.
  * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
  *
+ * Context: The function should not sleep in case is_atomic == true.
  * Return: Void.
  */
 void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
-                                 struct qed_mcp_iscsi_stats *stats);
+                                 struct qed_mcp_iscsi_stats *stats,
+                                 bool is_atomic);
 #else /* IS_ENABLED(CONFIG_QED_ISCSI) */
 static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
 {
@@ -56,7 +59,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}
 
 static inline void
 qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
-                            struct qed_mcp_iscsi_stats *stats) {}
+                            struct qed_mcp_iscsi_stats *stats,
+                            bool is_atomic) {}
 #endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
 
 #endif
index 7776d3b..970b9aa 100644 (file)
@@ -1863,7 +1863,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
 }
 
 static void _qed_get_vport_stats(struct qed_dev *cdev,
-                                struct qed_eth_stats *stats)
+                                struct qed_eth_stats *stats,
+                                bool is_atomic)
 {
        u8 fw_vport = 0;
        int i;
@@ -1872,10 +1873,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
 
        for_each_hwfn(cdev, i) {
                struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-               struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
-                                                   :  NULL;
+               struct qed_ptt *p_ptt;
                bool b_get_port_stats;
 
+               p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
+                                   : NULL;
                if (IS_PF(cdev)) {
                        /* The main vport index is relative first */
                        if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
@@ -1901,6 +1903,13 @@ out:
 
 void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
 {
+       qed_get_vport_stats_context(cdev, stats, false);
+}
+
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+                                struct qed_eth_stats *stats,
+                                bool is_atomic)
+{
        u32 i;
 
        if (!cdev || cdev->recov_in_prog) {
@@ -1908,7 +1917,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
                return;
        }
 
-       _qed_get_vport_stats(cdev, stats);
+       _qed_get_vport_stats(cdev, stats, is_atomic);
 
        if (!cdev->reset_stats)
                return;
@@ -1960,7 +1969,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
        if (!cdev->reset_stats) {
                DP_INFO(cdev, "Reset stats not allocated\n");
        } else {
-               _qed_get_vport_stats(cdev, cdev->reset_stats);
+               _qed_get_vport_stats(cdev, cdev->reset_stats, false);
                cdev->reset_stats->common.link_change_count = 0;
        }
 }
index a538cf4..2d2f82c 100644 (file)
@@ -249,8 +249,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
                            enum spq_mode comp_mode,
                            struct qed_spq_comp_cb *p_comp_data);
 
+/**
+ * qed_get_vport_stats(): Fills provided statistics
+ *                       struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ *
+ * Return: Void.
+ */
 void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
 
+/**
+ * qed_get_vport_stats_context(): Fills provided statistics
+ *                               struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+                                struct qed_eth_stats *stats,
+                                bool is_atomic);
+
 void qed_reset_vport_stats(struct qed_dev *cdev);
 
 /**
index f5af833..c278f88 100644 (file)
@@ -3092,7 +3092,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
 
        switch (type) {
        case QED_MCP_LAN_STATS:
-               qed_get_vport_stats(cdev, &eth_stats);
+               qed_get_vport_stats_context(cdev, &eth_stats, true);
                stats->lan_stats.ucast_rx_pkts =
                                        eth_stats.common.rx_ucast_pkts;
                stats->lan_stats.ucast_tx_pkts =
@@ -3100,10 +3100,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
                stats->lan_stats.fcs_err = -1;
                break;
        case QED_MCP_FCOE_STATS:
-               qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
+               qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
                break;
        case QED_MCP_ISCSI_STATS:
-               qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
+               qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
                break;
        default:
                DP_VERBOSE(cdev, QED_MSG_SP,
index 4b004a7..99df00c 100644 (file)
@@ -176,6 +176,15 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
 }
 #endif
 
+static int __maybe_unused qede_suspend(struct device *dev)
+{
+       dev_info(dev, "Device does not support suspend operation\n");
+
+       return -EOPNOTSUPP;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(qede_pm_ops, qede_suspend, NULL);
+
 static const struct pci_error_handlers qede_err_handler = {
        .error_detected = qede_io_error_detected,
 };
@@ -190,6 +199,7 @@ static struct pci_driver qede_pci_driver = {
        .sriov_configure = qede_sriov_configure,
 #endif
        .err_handler = &qede_err_handler,
+       .driver.pm = &qede_pm_ops,
 };
 
 static struct qed_eth_cb_ops qede_ll_ops = {
index 0d80447..d5c688a 100644 (file)
@@ -1260,8 +1260,11 @@ static int emac_tso_csum(struct emac_adapter *adpt,
                if (skb->protocol == htons(ETH_P_IP)) {
                        u32 pkt_len = ((unsigned char *)ip_hdr(skb) - skb->data)
                                       + ntohs(ip_hdr(skb)->tot_len);
-                       if (skb->len > pkt_len)
-                               pskb_trim(skb, pkt_len);
+                       if (skb->len > pkt_len) {
+                               ret = pskb_trim(skb, pkt_len);
+                               if (unlikely(ret))
+                                       return ret;
+                       }
                }
 
                hdr_len = skb_tcp_all_headers(skb);
index 9445f04..5eb50b2 100644 (file)
@@ -623,6 +623,7 @@ struct rtl8169_private {
        int cfg9346_usage_count;
 
        unsigned supports_gmii:1;
+       unsigned aspm_manageable:1;
        dma_addr_t counters_phys_addr;
        struct rtl8169_counters *counters;
        struct rtl8169_tc_offsets tc_offset;
@@ -2746,7 +2747,15 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
        if (tp->mac_version < RTL_GIGA_MAC_VER_32)
                return;
 
-       if (enable) {
+       /* Don't enable ASPM in the chip if OS can't control ASPM */
+       if (enable && tp->aspm_manageable) {
+               /* On these chip versions ASPM can even harm
+                * bus communication of other PCI devices.
+                */
+               if (tp->mac_version == RTL_GIGA_MAC_VER_42 ||
+                   tp->mac_version == RTL_GIGA_MAC_VER_43)
+                       return;
+
                rtl_mod_config5(tp, 0, ASPM_en);
                rtl_mod_config2(tp, 0, ClkReqEn);
 
@@ -4514,10 +4523,6 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
        }
 
        if (napi_schedule_prep(&tp->napi)) {
-               rtl_unlock_config_regs(tp);
-               rtl_hw_aspm_clkreq_enable(tp, false);
-               rtl_lock_config_regs(tp);
-
                rtl_irq_disable(tp);
                __napi_schedule(&tp->napi);
        }
@@ -4577,14 +4582,9 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
 
        work_done = rtl_rx(dev, tp, budget);
 
-       if (work_done < budget && napi_complete_done(napi, work_done)) {
+       if (work_done < budget && napi_complete_done(napi, work_done))
                rtl_irq_enable(tp);
 
-               rtl_unlock_config_regs(tp);
-               rtl_hw_aspm_clkreq_enable(tp, true);
-               rtl_lock_config_regs(tp);
-       }
-
        return work_done;
 }
 
@@ -5158,6 +5158,16 @@ done:
        rtl_rar_set(tp, mac_addr);
 }
 
+/* register is set if system vendor successfully tested ASPM 1.2 */
+static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
+{
+       if (tp->mac_version >= RTL_GIGA_MAC_VER_61 &&
+           r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
+               return true;
+
+       return false;
+}
+
 static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        struct rtl8169_private *tp;
@@ -5227,6 +5237,19 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                                     xid);
        tp->mac_version = chipset;
 
+       /* Disable ASPM L1 as that cause random device stop working
+        * problems as well as full system hangs for some PCIe devices users.
+        * Chips from RTL8168h partially have issues with L1.2, but seem
+        * to work fine with L1 and L1.1.
+        */
+       if (rtl_aspm_is_safe(tp))
+               rc = 0;
+       else if (tp->mac_version >= RTL_GIGA_MAC_VER_46)
+               rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
+       else
+               rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
+       tp->aspm_manageable = !rc;
+
        tp->dash_type = rtl_check_dash(tp);
 
        tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
index 7adde96..35d8e98 100644 (file)
@@ -1194,7 +1194,7 @@ int ef100_probe_netdev_pf(struct efx_nic *efx)
                net_dev->features |= NETIF_F_HW_TC;
                efx->fixed_features |= NETIF_F_HW_TC;
        }
-       return rc;
+       return 0;
 }
 
 int ef100_probe_vf(struct efx_nic *efx)
index 9e5ce2a..c3dc88e 100644 (file)
  */
 struct ef4_loopback_payload {
        char pad[2]; /* Ensures ip is 4-byte aligned */
-       struct ethhdr header;
-       struct iphdr ip;
-       struct udphdr udp;
-       __be16 iteration;
-       char msg[64];
+       struct_group_attr(packet, __packed,
+               struct ethhdr header;
+               struct iphdr ip;
+               struct udphdr udp;
+               __be16 iteration;
+               char msg[64];
+       );
 } __packed __aligned(4);
-#define EF4_LOOPBACK_PAYLOAD_LEN       (sizeof(struct ef4_loopback_payload) - \
-                                        offsetof(struct ef4_loopback_payload, \
-                                                 header))
+#define EF4_LOOPBACK_PAYLOAD_LEN       \
+               sizeof_field(struct ef4_loopback_payload, packet)
 
 /* Loopback test source MAC address */
 static const u8 payload_source[ETH_ALEN] __aligned(2) = {
@@ -299,7 +300,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx,
 
        payload = &state->payload;
 
-       memcpy(&received.header, buf_ptr,
+       memcpy(&received.packet, buf_ptr,
               min_t(int, pkt_len, EF4_LOOPBACK_PAYLOAD_LEN));
        received.ip.saddr = payload->ip.saddr;
        if (state->offload_csum)
@@ -370,7 +371,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx,
                               buf_ptr, pkt_len, 0);
                netif_err(efx, drv, efx->net_dev, "expected packet:\n");
                print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
-                              &state->payload.header, EF4_LOOPBACK_PAYLOAD_LEN,
+                              &state->payload.packet, EF4_LOOPBACK_PAYLOAD_LEN,
                               0);
        }
 #endif
@@ -427,7 +428,7 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue)
        for (i = 0; i < state->packet_count; i++) {
                /* Allocate an skb, holding an extra reference for
                 * transmit completion counting */
-               skb = alloc_skb(EF4_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+               skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
                if (!skb)
                        return -ENOMEM;
                state->skbs[i] = skb;
@@ -440,6 +441,8 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue)
                payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
                /* Strip off the leading padding */
                skb_pull(skb, offsetof(struct ef4_loopback_payload, header));
+               /* Strip off the trailing padding */
+               skb_trim(skb, EF4_LOOPBACK_PAYLOAD_LEN);
 
                /* Ensure everything we've written is visible to the
                 * interrupt handler. */
index 96d856b..563c1e3 100644 (file)
  */
 struct efx_loopback_payload {
        char pad[2]; /* Ensures ip is 4-byte aligned */
-       struct ethhdr header;
-       struct iphdr ip;
-       struct udphdr udp;
-       __be16 iteration;
-       char msg[64];
+       struct_group_attr(packet, __packed,
+               struct ethhdr header;
+               struct iphdr ip;
+               struct udphdr udp;
+               __be16 iteration;
+               char msg[64];
+       );
 } __packed __aligned(4);
-#define EFX_LOOPBACK_PAYLOAD_LEN       (sizeof(struct efx_loopback_payload) - \
-                                        offsetof(struct efx_loopback_payload, \
-                                                 header))
+#define EFX_LOOPBACK_PAYLOAD_LEN       \
+               sizeof_field(struct efx_loopback_payload, packet)
 
 /* Loopback test source MAC address */
 static const u8 payload_source[ETH_ALEN] __aligned(2) = {
@@ -297,7 +298,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx,
 
        payload = &state->payload;
 
-       memcpy(&received.header, buf_ptr,
+       memcpy(&received.packet, buf_ptr,
               min_t(int, pkt_len, EFX_LOOPBACK_PAYLOAD_LEN));
        received.ip.saddr = payload->ip.saddr;
        if (state->offload_csum)
@@ -368,7 +369,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx,
                               buf_ptr, pkt_len, 0);
                netif_err(efx, drv, efx->net_dev, "expected packet:\n");
                print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
-                              &state->payload.header, EFX_LOOPBACK_PAYLOAD_LEN,
+                              &state->payload.packet, EFX_LOOPBACK_PAYLOAD_LEN,
                               0);
        }
 #endif
@@ -425,7 +426,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
        for (i = 0; i < state->packet_count; i++) {
                /* Allocate an skb, holding an extra reference for
                 * transmit completion counting */
-               skb = alloc_skb(EFX_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+               skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
                if (!skb)
                        return -ENOMEM;
                state->skbs[i] = skb;
@@ -438,6 +439,8 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
                payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
                /* Strip off the leading padding */
                skb_pull(skb, offsetof(struct efx_loopback_payload, header));
+               /* Strip off the trailing padding */
+               skb_trim(skb, EFX_LOOPBACK_PAYLOAD_LEN);
 
                /* Ensure everything we've written is visible to the
                 * interrupt handler. */
index 111ac17..526da43 100644 (file)
  */
 struct efx_loopback_payload {
        char pad[2]; /* Ensures ip is 4-byte aligned */
-       struct ethhdr header;
-       struct iphdr ip;
-       struct udphdr udp;
-       __be16 iteration;
-       char msg[64];
+       struct_group_attr(packet, __packed,
+               struct ethhdr header;
+               struct iphdr ip;
+               struct udphdr udp;
+               __be16 iteration;
+               char msg[64];
+       );
 } __packed __aligned(4);
-#define EFX_LOOPBACK_PAYLOAD_LEN       (sizeof(struct efx_loopback_payload) - \
-                                        offsetof(struct efx_loopback_payload, \
-                                                 header))
+#define EFX_LOOPBACK_PAYLOAD_LEN       \
+               sizeof_field(struct efx_loopback_payload, packet)
 
 /* Loopback test source MAC address */
 static const u8 payload_source[ETH_ALEN] __aligned(2) = {
@@ -297,7 +298,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx,
 
        payload = &state->payload;
 
-       memcpy(&received.header, buf_ptr,
+       memcpy(&received.packet, buf_ptr,
               min_t(int, pkt_len, EFX_LOOPBACK_PAYLOAD_LEN));
        received.ip.saddr = payload->ip.saddr;
        if (state->offload_csum)
@@ -368,7 +369,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx,
                               buf_ptr, pkt_len, 0);
                netif_err(efx, drv, efx->net_dev, "expected packet:\n");
                print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
-                              &state->payload.header, EFX_LOOPBACK_PAYLOAD_LEN,
+                              &state->payload.packet, EFX_LOOPBACK_PAYLOAD_LEN,
                               0);
        }
 #endif
@@ -425,7 +426,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
        for (i = 0; i < state->packet_count; i++) {
                /* Allocate an skb, holding an extra reference for
                 * transmit completion counting */
-               skb = alloc_skb(EFX_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+               skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
                if (!skb)
                        return -ENOMEM;
                state->skbs[i] = skb;
@@ -438,6 +439,8 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
                payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
                /* Strip off the leading padding */
                skb_pull(skb, offsetof(struct efx_loopback_payload, header));
+               /* Strip off the trailing padding */
+               skb_trim(skb, EFX_LOOPBACK_PAYLOAD_LEN);
 
                /* Ensure everything we've written is visible to the
                 * interrupt handler. */
index 15ebd39..fe268b6 100644 (file)
@@ -1657,10 +1657,10 @@ int efx_init_tc(struct efx_nic *efx)
        rc = efx_tc_configure_fallback_acts_reps(efx);
        if (rc)
                return rc;
-       efx->tc->up = true;
        rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
        if (rc)
                return rc;
+       efx->tc->up = true;
        return 0;
 }
 
index 2d7347b..0dcd6a5 100644 (file)
@@ -1851,6 +1851,17 @@ static int netsec_of_probe(struct platform_device *pdev,
                return err;
        }
 
+       /*
+        * SynQuacer is physically configured with TX and RX delays
+        * but the standard firmware claimed otherwise for a long
+        * time, ignore it.
+        */
+       if (of_machine_is_compatible("socionext,developer-box") &&
+           priv->phy_interface != PHY_INTERFACE_MODE_RGMII_ID) {
+               dev_warn(&pdev->dev, "Outdated firmware reports incorrect PHY mode, overriding\n");
+               priv->phy_interface = PHY_INTERFACE_MODE_RGMII_ID;
+       }
+
        priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
        if (!priv->phy_np) {
                dev_err(&pdev->dev, "missing required property 'phy-handle'\n");
index f8367c5..fbb0ccf 100644 (file)
@@ -234,7 +234,8 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
        res.addr = mgbe->regs;
        res.irq = irq;
 
-       mgbe->clks = devm_kzalloc(&pdev->dev, sizeof(*mgbe->clks), GFP_KERNEL);
+       mgbe->clks = devm_kcalloc(&pdev->dev, ARRAY_SIZE(mgbe_clks),
+                                 sizeof(*mgbe->clks), GFP_KERNEL);
        if (!mgbe->clks)
                return -ENOMEM;
 
index df41eac..03ceb6a 100644 (file)
@@ -240,13 +240,15 @@ void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, const u8 addr[6],
 void stmmac_dwmac4_set_mac(void __iomem *ioaddr, bool enable)
 {
        u32 value = readl(ioaddr + GMAC_CONFIG);
+       u32 old_val = value;
 
        if (enable)
                value |= GMAC_CONFIG_RE | GMAC_CONFIG_TE;
        else
                value &= ~(GMAC_CONFIG_TE | GMAC_CONFIG_RE);
 
-       writel(value, ioaddr + GMAC_CONFIG);
+       if (value != old_val)
+               writel(value, ioaddr + GMAC_CONFIG);
 }
 
 void stmmac_dwmac4_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
index 0c5e783..64bf22c 100644 (file)
@@ -106,23 +106,37 @@ struct cpsw_ale_dev_id {
 
 static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
 {
-       int idx;
+       int idx, idx2;
+       u32 hi_val = 0;
 
        idx    = start / 32;
+       idx2 = (start + bits - 1) / 32;
+       /* Check if bits to be fetched exceed a word */
+       if (idx != idx2) {
+               idx2 = 2 - idx2; /* flip */
+               hi_val = ale_entry[idx2] << ((idx2 * 32) - start);
+       }
        start -= idx * 32;
        idx    = 2 - idx; /* flip */
-       return (ale_entry[idx] >> start) & BITMASK(bits);
+       return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits);
 }
 
 static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
                                      u32 value)
 {
-       int idx;
+       int idx, idx2;
 
        value &= BITMASK(bits);
-       idx    = start / 32;
+       idx = start / 32;
+       idx2 = (start + bits - 1) / 32;
+       /* Check if bits to be set exceed a word */
+       if (idx != idx2) {
+               idx2 = 2 - idx2; /* flip */
+               ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32)));
+               ale_entry[idx2] |= (value >> ((idx2 * 32) - start));
+       }
        start -= idx * 32;
-       idx    = 2 - idx; /* flip */
+       idx = 2 - idx; /* flip */
        ale_entry[idx] &= ~(BITMASK(bits) << start);
        ale_entry[idx] |=  (value << start);
 }
index 39a9aee..6321178 100644 (file)
@@ -1511,7 +1511,6 @@ static void wx_configure_rx(struct wx *wx)
        psrtype = WX_RDB_PL_CFG_L4HDR |
                  WX_RDB_PL_CFG_L3HDR |
                  WX_RDB_PL_CFG_L2HDR |
-                 WX_RDB_PL_CFG_TUN_TUNHDR |
                  WX_RDB_PL_CFG_TUN_TUNHDR;
        wr32(wx, WX_RDB_PL_CFG(0), psrtype);
 
index 12405d7..0772eb1 100644 (file)
@@ -186,9 +186,6 @@ static int txgbe_calc_eeprom_checksum(struct wx *wx, u16 *checksum)
        if (eeprom_ptrs)
                kvfree(eeprom_ptrs);
 
-       if (*checksum > TXGBE_EEPROM_SUM)
-               return -EINVAL;
-
        *checksum = TXGBE_EEPROM_SUM - *checksum;
 
        return 0;
index e0ac1bc..49f3033 100644 (file)
@@ -1567,12 +1567,16 @@ static int temac_probe(struct platform_device *pdev)
        }
 
        /* Error handle returned DMA RX and TX interrupts */
-       if (lp->rx_irq < 0)
-               return dev_err_probe(&pdev->dev, lp->rx_irq,
+       if (lp->rx_irq <= 0) {
+               rc = lp->rx_irq ?: -EINVAL;
+               return dev_err_probe(&pdev->dev, rc,
                                     "could not get DMA RX irq\n");
-       if (lp->tx_irq < 0)
-               return dev_err_probe(&pdev->dev, lp->tx_irq,
+       }
+       if (lp->tx_irq <= 0) {
+               rc = lp->tx_irq ?: -EINVAL;
+               return dev_err_probe(&pdev->dev, rc,
                                     "could not get DMA TX irq\n");
+       }
 
        if (temac_np) {
                /* Retrieve the MAC address */
index f0529c3..7b637bb 100644 (file)
@@ -273,16 +273,15 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
        if (ret)
                return ret;
 
-       ret = ipa_filter_reset_table(ipa, true, false, modem);
-       if (ret)
+       ret = ipa_filter_reset_table(ipa, false, true, modem);
+       if (ret || !ipa_table_hash_support(ipa))
                return ret;
 
-       ret = ipa_filter_reset_table(ipa, false, true, modem);
+       ret = ipa_filter_reset_table(ipa, true, false, modem);
        if (ret)
                return ret;
-       ret = ipa_filter_reset_table(ipa, true, true, modem);
 
-       return ret;
+       return ipa_filter_reset_table(ipa, true, true, modem);
 }
 
 /* The AP routes and modem routes are each contiguous within the
@@ -291,12 +290,13 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
  * */
 static int ipa_route_reset(struct ipa *ipa, bool modem)
 {
+       bool hash_support = ipa_table_hash_support(ipa);
        u32 modem_route_count = ipa->modem_route_count;
        struct gsi_trans *trans;
        u16 first;
        u16 count;
 
-       trans = ipa_cmd_trans_alloc(ipa, 4);
+       trans = ipa_cmd_trans_alloc(ipa, hash_support ? 4 : 2);
        if (!trans) {
                dev_err(&ipa->pdev->dev,
                        "no transaction for %s route reset\n",
@@ -313,10 +313,12 @@ static int ipa_route_reset(struct ipa *ipa, bool modem)
        }
 
        ipa_table_reset_add(trans, false, false, false, first, count);
-       ipa_table_reset_add(trans, false, true, false, first, count);
-
        ipa_table_reset_add(trans, false, false, true, first, count);
-       ipa_table_reset_add(trans, false, true, true, first, count);
+
+       if (hash_support) {
+               ipa_table_reset_add(trans, false, true, false, first, count);
+               ipa_table_reset_add(trans, false, true, true, first, count);
+       }
 
        gsi_trans_commit_wait(trans);
 
index b15dd9a..1b55928 100644 (file)
@@ -748,7 +748,8 @@ static int ipvlan_device_event(struct notifier_block *unused,
 
                write_pnet(&port->pnet, newnet);
 
-               ipvlan_migrate_l3s_hook(oldnet, newnet);
+               if (port->mode == IPVLAN_MODE_L3S)
+                       ipvlan_migrate_l3s_hook(oldnet, newnet);
                break;
        }
        case NETDEV_UNREGISTER:
index 984dfa5..144ec75 100644 (file)
@@ -743,7 +743,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
                u64_stats_update_begin(&rxsc_stats->syncp);
                rxsc_stats->stats.InPktsLate++;
                u64_stats_update_end(&rxsc_stats->syncp);
-               secy->netdev->stats.rx_dropped++;
+               DEV_STATS_INC(secy->netdev, rx_dropped);
                return false;
        }
 
@@ -767,7 +767,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
                        rxsc_stats->stats.InPktsNotValid++;
                        u64_stats_update_end(&rxsc_stats->syncp);
                        this_cpu_inc(rx_sa->stats->InPktsNotValid);
-                       secy->netdev->stats.rx_errors++;
+                       DEV_STATS_INC(secy->netdev, rx_errors);
                        return false;
                }
 
@@ -1069,7 +1069,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
                        u64_stats_update_begin(&secy_stats->syncp);
                        secy_stats->stats.InPktsNoTag++;
                        u64_stats_update_end(&secy_stats->syncp);
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                        continue;
                }
 
@@ -1179,7 +1179,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                u64_stats_update_begin(&secy_stats->syncp);
                secy_stats->stats.InPktsBadTag++;
                u64_stats_update_end(&secy_stats->syncp);
-               secy->netdev->stats.rx_errors++;
+               DEV_STATS_INC(secy->netdev, rx_errors);
                goto drop_nosa;
        }
 
@@ -1196,7 +1196,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                        u64_stats_update_begin(&rxsc_stats->syncp);
                        rxsc_stats->stats.InPktsNotUsingSA++;
                        u64_stats_update_end(&rxsc_stats->syncp);
-                       secy->netdev->stats.rx_errors++;
+                       DEV_STATS_INC(secy->netdev, rx_errors);
                        if (active_rx_sa)
                                this_cpu_inc(active_rx_sa->stats->InPktsNotUsingSA);
                        goto drop_nosa;
@@ -1230,7 +1230,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
                        u64_stats_update_begin(&rxsc_stats->syncp);
                        rxsc_stats->stats.InPktsLate++;
                        u64_stats_update_end(&rxsc_stats->syncp);
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                        goto drop;
                }
        }
@@ -1271,7 +1271,7 @@ deliver:
        if (ret == NET_RX_SUCCESS)
                count_rx(dev, len);
        else
-               macsec->secy.netdev->stats.rx_dropped++;
+               DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
 
        rcu_read_unlock();
 
@@ -1308,7 +1308,7 @@ nosci:
                        u64_stats_update_begin(&secy_stats->syncp);
                        secy_stats->stats.InPktsNoSCI++;
                        u64_stats_update_end(&secy_stats->syncp);
-                       macsec->secy.netdev->stats.rx_errors++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_errors);
                        continue;
                }
 
@@ -1327,7 +1327,7 @@ nosci:
                        secy_stats->stats.InPktsUnknownSCI++;
                        u64_stats_update_end(&secy_stats->syncp);
                } else {
-                       macsec->secy.netdev->stats.rx_dropped++;
+                       DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
                }
        }
 
@@ -3422,7 +3422,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
 
        if (!secy->operational) {
                kfree_skb(skb);
-               dev->stats.tx_dropped++;
+               DEV_STATS_INC(dev, tx_dropped);
                return NETDEV_TX_OK;
        }
 
@@ -3430,7 +3430,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
        skb = macsec_encrypt(skb, dev);
        if (IS_ERR(skb)) {
                if (PTR_ERR(skb) != -EINPROGRESS)
-                       dev->stats.tx_dropped++;
+                       DEV_STATS_INC(dev, tx_dropped);
                return NETDEV_TX_OK;
        }
 
@@ -3667,9 +3667,9 @@ static void macsec_get_stats64(struct net_device *dev,
 
        dev_fetch_sw_netstats(s, dev->tstats);
 
-       s->rx_dropped = dev->stats.rx_dropped;
-       s->tx_dropped = dev->stats.tx_dropped;
-       s->rx_errors = dev->stats.rx_errors;
+       s->rx_dropped = atomic_long_read(&dev->stats.__rx_dropped);
+       s->tx_dropped = atomic_long_read(&dev->stats.__tx_dropped);
+       s->rx_errors = atomic_long_read(&dev->stats.__rx_errors);
 }
 
 static int macsec_get_iflink(const struct net_device *dev)
index 4a53deb..ed90816 100644 (file)
@@ -1746,6 +1746,7 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
        [IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 },
        [IFLA_MACVLAN_BC_QUEUE_LEN] = { .type = NLA_U32 },
        [IFLA_MACVLAN_BC_QUEUE_LEN_USED] = { .type = NLA_REJECT },
+       [IFLA_MACVLAN_BC_CUTOFF] = { .type = NLA_S32 },
 };
 
 int macvlan_link_register(struct rtnl_link_ops *ops)
index b839325..81b7748 100644 (file)
@@ -186,7 +186,7 @@ int mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, int reg)
        struct mdiobb_ctrl *ctrl = bus->priv;
 
        mdiobb_cmd_addr(ctrl, phy, devad, reg);
-       mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg);
+       mdiobb_cmd(ctrl, MDIO_C45_READ, phy, devad);
 
        return mdiobb_read_common(bus, phy);
 }
@@ -222,7 +222,7 @@ int mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, int reg, u16 val)
        struct mdiobb_ctrl *ctrl = bus->priv;
 
        mdiobb_cmd_addr(ctrl, phy, devad, reg);
-       mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, reg);
+       mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, devad);
 
        return mdiobb_write_common(bus, val);
 }
index 6045bec..b4d3b9c 100644 (file)
@@ -184,13 +184,10 @@ static ssize_t nsim_dev_trap_fa_cookie_write(struct file *file,
        cookie_len = (count - 1) / 2;
        if ((count - 1) % 2)
                return -EINVAL;
-       buf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN);
-       if (!buf)
-               return -ENOMEM;
 
-       ret = simple_write_to_buffer(buf, count, ppos, data, count);
-       if (ret < 0)
-               goto free_buf;
+       buf = memdup_user(data, count);
+       if (IS_ERR(buf))
+               return PTR_ERR(buf);
 
        fa_cookie = kmalloc(sizeof(*fa_cookie) + cookie_len,
                            GFP_KERNEL | __GFP_NOWARN);
index 323bec5..3560991 100644 (file)
@@ -313,15 +313,21 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np)
 
        pdev = of_find_device_by_node(pcs_np);
        of_node_put(pcs_np);
-       if (!pdev || !platform_get_drvdata(pdev))
+       if (!pdev || !platform_get_drvdata(pdev)) {
+               if (pdev)
+                       put_device(&pdev->dev);
                return ERR_PTR(-EPROBE_DEFER);
+       }
 
        miic_port = kzalloc(sizeof(*miic_port), GFP_KERNEL);
-       if (!miic_port)
+       if (!miic_port) {
+               put_device(&pdev->dev);
                return ERR_PTR(-ENOMEM);
+       }
 
        miic = platform_get_drvdata(pdev);
        device_link_add(dev, miic->dev, DL_FLAG_AUTOREMOVE_CONSUMER);
+       put_device(&pdev->dev);
 
        miic_port->miic = miic;
        miic_port->port = port - 1;
index c1f307d..8a77ec3 100644 (file)
@@ -459,21 +459,27 @@ static int at803x_set_wol(struct phy_device *phydev,
                        phy_write_mmd(phydev, MDIO_MMD_PCS, offsets[i],
                                      mac[(i * 2) + 1] | (mac[(i * 2)] << 8));
 
-               /* Enable WOL function */
-               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
-                               0, AT803X_WOL_EN);
-               if (ret)
-                       return ret;
+               /* Enable WOL function for 1588 */
+               if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+                       ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                            AT803X_PHY_MMD3_WOL_CTRL,
+                                            0, AT803X_WOL_EN);
+                       if (ret)
+                               return ret;
+               }
                /* Enable WOL interrupt */
                ret = phy_modify(phydev, AT803X_INTR_ENABLE, 0, AT803X_INTR_ENABLE_WOL);
                if (ret)
                        return ret;
        } else {
-               /* Disable WoL function */
-               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
-                               AT803X_WOL_EN, 0);
-               if (ret)
-                       return ret;
+               /* Disable WoL function for 1588 */
+               if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+                       ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                            AT803X_PHY_MMD3_WOL_CTRL,
+                                            AT803X_WOL_EN, 0);
+                       if (ret)
+                               return ret;
+               }
                /* Disable WOL interrupt */
                ret = phy_modify(phydev, AT803X_INTR_ENABLE, AT803X_INTR_ENABLE_WOL, 0);
                if (ret)
@@ -508,11 +514,11 @@ static void at803x_get_wol(struct phy_device *phydev,
        wol->supported = WAKE_MAGIC;
        wol->wolopts = 0;
 
-       value = phy_read_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL);
+       value = phy_read(phydev, AT803X_INTR_ENABLE);
        if (value < 0)
                return;
 
-       if (value & AT803X_WOL_EN)
+       if (value & AT803X_INTR_ENABLE_WOL)
                wol->wolopts |= WAKE_MAGIC;
 }
 
@@ -858,9 +864,6 @@ static int at803x_probe(struct phy_device *phydev)
        if (phydev->drv->phy_id == ATH8031_PHY_ID) {
                int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
                int mode_cfg;
-               struct ethtool_wolinfo wol = {
-                       .wolopts = 0,
-               };
 
                if (ccr < 0)
                        return ccr;
@@ -877,12 +880,14 @@ static int at803x_probe(struct phy_device *phydev)
                        break;
                }
 
-               /* Disable WOL by default */
-               ret = at803x_set_wol(phydev, &wol);
-               if (ret < 0) {
-                       phydev_err(phydev, "failed to disable WOL on probe: %d\n", ret);
+               /* Disable WoL in 1588 register which is enabled
+                * by default
+                */
+               ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+                                    AT803X_PHY_MMD3_WOL_CTRL,
+                                    AT803X_WOL_EN, 0);
+               if (ret)
                        return ret;
-               }
        }
 
        return 0;
@@ -2059,8 +2064,6 @@ static struct phy_driver at803x_driver[] = {
        .flags                  = PHY_POLL_CABLE_TEST,
        .config_init            = at803x_config_init,
        .link_change_notify     = at803x_link_change_notify,
-       .set_wol                = at803x_set_wol,
-       .get_wol                = at803x_get_wol,
        .suspend                = at803x_suspend,
        .resume                 = at803x_resume,
        /* PHY_BASIC_FEATURES */
index 59cae0d..04b2e6e 100644 (file)
@@ -542,6 +542,17 @@ static int bcm54xx_resume(struct phy_device *phydev)
        return bcm54xx_config_init(phydev);
 }
 
+static int bcm54810_read_mmd(struct phy_device *phydev, int devnum, u16 regnum)
+{
+       return -EOPNOTSUPP;
+}
+
+static int bcm54810_write_mmd(struct phy_device *phydev, int devnum, u16 regnum,
+                             u16 val)
+{
+       return -EOPNOTSUPP;
+}
+
 static int bcm54811_config_init(struct phy_device *phydev)
 {
        int err, reg;
@@ -1103,6 +1114,8 @@ static struct phy_driver broadcom_drivers[] = {
        .get_strings    = bcm_phy_get_strings,
        .get_stats      = bcm54xx_get_stats,
        .probe          = bcm54xx_phy_probe,
+       .read_mmd       = bcm54810_read_mmd,
+       .write_mmd      = bcm54810_write_mmd,
        .config_init    = bcm54xx_config_init,
        .config_aneg    = bcm5481_config_aneg,
        .config_intr    = bcm_phy_config_intr,
index 55d9d7a..d4bb90d 100644 (file)
@@ -328,6 +328,13 @@ static int mv3310_power_up(struct phy_device *phydev)
        ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL,
                                 MV_V2_PORT_CTRL_PWRDOWN);
 
+       /* Sometimes, the power down bit doesn't clear immediately, and
+        * a read of this register causes the bit not to clear. Delay
+        * 100us to allow the PHY to come out of power down mode before
+        * the next access.
+        */
+       udelay(100);
+
        if (phydev->drv->phy_id != MARVELL_PHY_ID_88X3310 ||
            priv->firmware_ver < 0x00030000)
                return ret;
index bdf00b2..a9ecfdd 100644 (file)
@@ -1184,9 +1184,11 @@ void phy_stop_machine(struct phy_device *phydev)
 
 static void phy_process_error(struct phy_device *phydev)
 {
-       mutex_lock(&phydev->lock);
+       /* phydev->lock must be held for the state change to be safe */
+       if (!mutex_is_locked(&phydev->lock))
+               phydev_err(phydev, "PHY-device data unsafe context\n");
+
        phydev->state = PHY_ERROR;
-       mutex_unlock(&phydev->lock);
 
        phy_trigger_machine(phydev);
 }
@@ -1195,7 +1197,9 @@ static void phy_error_precise(struct phy_device *phydev,
                              const void *func, int err)
 {
        WARN(1, "%pS: returned: %d\n", func, err);
+       mutex_lock(&phydev->lock);
        phy_process_error(phydev);
+       mutex_unlock(&phydev->lock);
 }
 
 /**
@@ -1204,8 +1208,7 @@ static void phy_error_precise(struct phy_device *phydev,
  *
  * Moves the PHY to the ERROR state in response to a read
  * or write error, and tells the controller the link is down.
- * Must not be called from interrupt context, or while the
- * phydev->lock is held.
+ * Must be called with phydev->lock held.
  */
 void phy_error(struct phy_device *phydev)
 {
index 0c2014a..c7cf61f 100644 (file)
@@ -3216,6 +3216,8 @@ static int phy_probe(struct device *dev)
                        goto out;
        }
 
+       phy_disable_interrupts(phydev);
+
        /* Start out supporting everything. Eventually,
         * a controller will attach, and may modify one
         * or both of these values
@@ -3333,16 +3335,6 @@ static int phy_remove(struct device *dev)
        return 0;
 }
 
-static void phy_shutdown(struct device *dev)
-{
-       struct phy_device *phydev = to_phy_device(dev);
-
-       if (phydev->state == PHY_READY || !phydev->attached_dev)
-               return;
-
-       phy_disable_interrupts(phydev);
-}
-
 /**
  * phy_driver_register - register a phy_driver with the PHY layer
  * @new_driver: new phy_driver to register
@@ -3376,7 +3368,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
        new_driver->mdiodrv.driver.bus = &mdio_bus_type;
        new_driver->mdiodrv.driver.probe = phy_probe;
        new_driver->mdiodrv.driver.remove = phy_remove;
-       new_driver->mdiodrv.driver.shutdown = phy_shutdown;
        new_driver->mdiodrv.driver.owner = owner;
        new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
 
@@ -3451,23 +3442,30 @@ static int __init phy_init(void)
 {
        int rc;
 
+       ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
+
        rc = mdio_bus_init();
        if (rc)
-               return rc;
+               goto err_ethtool_phy_ops;
 
-       ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
        features_init();
 
        rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE);
        if (rc)
-               goto err_c45;
+               goto err_mdio_bus;
 
        rc = phy_driver_register(&genphy_driver, THIS_MODULE);
-       if (rc) {
-               phy_driver_unregister(&genphy_c45_driver);
+       if (rc)
+               goto err_c45;
+
+       return 0;
+
 err_c45:
-               mdio_bus_exit();
-       }
+       phy_driver_unregister(&genphy_c45_driver);
+err_mdio_bus:
+       mdio_bus_exit();
+err_ethtool_phy_ops:
+       ethtool_set_ethtool_phy_ops(NULL);
 
        return rc;
 }
index e8dd47b..208a939 100644 (file)
@@ -258,6 +258,16 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
        switch (id->base.extended_cc) {
        case SFF8024_ECC_UNSPEC:
                break;
+       case SFF8024_ECC_100G_25GAUI_C2M_AOC:
+               if (br_min <= 28000 && br_max >= 25000) {
+                       /* 25GBASE-R, possibly with FEC */
+                       __set_bit(PHY_INTERFACE_MODE_25GBASER, interfaces);
+                       /* There is currently no link mode for 25000base
+                        * with unspecified range, reuse SR.
+                        */
+                       phylink_set(modes, 25000baseSR_Full);
+               }
+               break;
        case SFF8024_ECC_100GBASE_SR4_25GBASE_SR:
                phylink_set(modes, 100000baseSR4_Full);
                phylink_set(modes, 25000baseSR_Full);
index 9137fb8..49d1d6a 100644 (file)
@@ -534,7 +534,7 @@ static int tap_open(struct inode *inode, struct file *file)
        q->sock.state = SS_CONNECTED;
        q->sock.file = file;
        q->sock.ops = &tap_socket_ops;
-       sock_init_data_uid(&q->sock, &q->sk, inode->i_uid);
+       sock_init_data_uid(&q->sock, &q->sk, current_fsuid());
        q->sk.sk_write_space = tap_sock_write_space;
        q->sk.sk_destruct = tap_sock_destruct;
        q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
index 555b0b1..382756c 100644 (file)
@@ -2135,6 +2135,15 @@ static void team_setup_by_port(struct net_device *dev,
        dev->mtu = port_dev->mtu;
        memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len);
        eth_hw_addr_inherit(dev, port_dev);
+
+       if (port_dev->flags & IFF_POINTOPOINT) {
+               dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
+               dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
+       } else if ((port_dev->flags & (IFF_BROADCAST | IFF_MULTICAST)) ==
+                   (IFF_BROADCAST | IFF_MULTICAST)) {
+               dev->flags |= (IFF_BROADCAST | IFF_MULTICAST);
+               dev->flags &= ~(IFF_POINTOPOINT | IFF_NOARP);
+       }
 }
 
 static int team_dev_type_check_change(struct net_device *dev,
@@ -2191,7 +2200,9 @@ static void team_setup(struct net_device *dev)
 
        dev->hw_features = TEAM_VLAN_FEATURES |
                           NETIF_F_HW_VLAN_CTAG_RX |
-                          NETIF_F_HW_VLAN_CTAG_FILTER;
+                          NETIF_F_HW_VLAN_CTAG_FILTER |
+                          NETIF_F_HW_VLAN_STAG_RX |
+                          NETIF_F_HW_VLAN_STAG_FILTER;
 
        dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
        dev->features |= dev->hw_features;
index d75456a..100339b 100644 (file)
@@ -1594,7 +1594,7 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
        if (zerocopy)
                return false;
 
-       if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+       if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
            SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
                return false;
 
@@ -3469,7 +3469,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
        tfile->socket.file = file;
        tfile->socket.ops = &tun_socket_ops;
 
-       sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid);
+       sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid());
 
        tfile->sk.sk_write_space = tun_sock_write_space;
        tfile->sk.sk_sndbuf = INT_MAX;
index c00a89b..6d61052 100644 (file)
@@ -618,6 +618,13 @@ static const struct usb_device_id  products[] = {
        .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
                          | USB_DEVICE_ID_MATCH_DEVICE,
        .idVendor               = 0x04DD,
+       .idProduct              = 0x8005,   /* A-300 */
+       ZAURUS_FAKE_INTERFACE,
+       .driver_info        = 0,
+}, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
        .idProduct              = 0x8006,       /* B-500/SL-5600 */
        ZAURUS_MASTER_INTERFACE,
        .driver_info            = 0,
@@ -625,11 +632,25 @@ static const struct usb_device_id products[] = {
        .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
                          | USB_DEVICE_ID_MATCH_DEVICE,
        .idVendor               = 0x04DD,
+       .idProduct              = 0x8006,   /* B-500/SL-5600 */
+       ZAURUS_FAKE_INTERFACE,
+       .driver_info        = 0,
+}, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
        .idProduct              = 0x8007,       /* C-700 */
        ZAURUS_MASTER_INTERFACE,
        .driver_info            = 0,
 }, {
        .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
+       .idProduct              = 0x8007,   /* C-700 */
+       ZAURUS_FAKE_INTERFACE,
+       .driver_info        = 0,
+}, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
                 | USB_DEVICE_ID_MATCH_DEVICE,
        .idVendor               = 0x04DD,
        .idProduct              = 0x9031,       /* C-750 C-760 */
index c458c03..59cde06 100644 (file)
@@ -4224,8 +4224,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
        if (!dev)
                return;
 
-       set_bit(EVENT_DEV_DISCONNECT, &dev->flags);
-
        netif_napi_del(&dev->napi);
 
        udev = interface_to_usbdev(intf);
@@ -4233,6 +4231,8 @@ static void lan78xx_disconnect(struct usb_interface *intf)
 
        unregister_netdev(net);
 
+       timer_shutdown_sync(&dev->stat_monitor);
+       set_bit(EVENT_DEV_DISCONNECT, &dev->flags);
        cancel_delayed_work_sync(&dev->wq);
 
        phydev = net->phydev;
@@ -4247,9 +4247,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
 
        usb_scuttle_anchored_urbs(&dev->deferred);
 
-       if (timer_pending(&dev->stat_monitor))
-               del_timer_sync(&dev->stat_monitor);
-
        lan78xx_unbind(dev, intf);
 
        lan78xx_free_tx_resources(dev);
index 417f7ea..344af3c 100644 (file)
@@ -1423,6 +1423,7 @@ static const struct usb_device_id products[] = {
        {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
        {QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */
        {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)},    /* Quectel BG96 */
+       {QMI_QUIRK_SET_DTR(0x2c7c, 0x030e, 4)}, /* Quectel EM05GV2 */
        {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */
        {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)},    /* Foxconn T77W968 LTE */
        {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)},    /* Foxconn T77W968 LTE with eSIM support*/
index 283ffdd..2d14b0d 100644 (file)
@@ -1775,6 +1775,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
        } else if (!info->in || !info->out)
                status = usbnet_get_endpoints (dev, udev);
        else {
+               u8 ep_addrs[3] = {
+                       info->in + USB_DIR_IN, info->out + USB_DIR_OUT, 0
+               };
+
                dev->in = usb_rcvbulkpipe (xdev, info->in);
                dev->out = usb_sndbulkpipe (xdev, info->out);
                if (!(info->flags & FLAG_NO_SETINT))
@@ -1784,6 +1788,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
                else
                        status = 0;
 
+               if (status == 0 && !usb_check_bulk_endpoints(udev, ep_addrs))
+                       status = -EINVAL;
        }
        if (status >= 0 && dev->status)
                status = init_status (dev, udev);
index 7984f21..df3617c 100644 (file)
@@ -289,11 +289,25 @@ static const struct usb_device_id products [] = {
        .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
                          | USB_DEVICE_ID_MATCH_DEVICE,
        .idVendor               = 0x04DD,
+       .idProduct              = 0x8005,       /* A-300 */
+       ZAURUS_FAKE_INTERFACE,
+       .driver_info = (unsigned long)&bogus_mdlm_info,
+}, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
        .idProduct              = 0x8006,       /* B-500/SL-5600 */
        ZAURUS_MASTER_INTERFACE,
        .driver_info = ZAURUS_PXA_INFO,
 }, {
        .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
+       .idProduct              = 0x8006,       /* B-500/SL-5600 */
+       ZAURUS_FAKE_INTERFACE,
+       .driver_info = (unsigned long)&bogus_mdlm_info,
+}, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
                  | USB_DEVICE_ID_MATCH_DEVICE,
        .idVendor               = 0x04DD,
        .idProduct              = 0x8007,       /* C-700 */
@@ -301,6 +315,13 @@ static const struct usb_device_id  products [] = {
        .driver_info = ZAURUS_PXA_INFO,
 }, {
        .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
+       .idProduct              = 0x8007,       /* C-700 */
+       ZAURUS_FAKE_INTERFACE,
+       .driver_info = (unsigned long)&bogus_mdlm_info,
+}, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
                 | USB_DEVICE_ID_MATCH_DEVICE,
        .idVendor               = 0x04DD,
        .idProduct              = 0x9031,       /* C-750 C-760 */
index 614f3e3..ef8eacb 100644 (file)
@@ -1081,8 +1081,9 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
 err_xdp_ring:
        for (i--; i >= start; i--)
                ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+       i = end;
 err_page_pool:
-       for (i = start; i < end; i++) {
+       for (i--; i >= start; i--) {
                page_pool_destroy(priv->rq[i].page_pool);
                priv->rq[i].page_pool = NULL;
        }
@@ -1860,10 +1861,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 
                nla_peer = data[VETH_INFO_PEER];
                ifmp = nla_data(nla_peer);
-               err = rtnl_nla_parse_ifla(peer_tb,
-                                         nla_data(nla_peer) + sizeof(struct ifinfomsg),
-                                         nla_len(nla_peer) - sizeof(struct ifinfomsg),
-                                         NULL);
+               err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
                if (err < 0)
                        return err;
 
index 0db14f6..8e9f4cf 100644 (file)
@@ -2761,7 +2761,7 @@ static void virtnet_init_default_rss(struct virtnet_info *vi)
                vi->ctrl->rss.indirection_table[i] = indir_val;
        }
 
-       vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs;
+       vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0;
        vi->ctrl->rss.hash_key_length = vi->rss_key_size;
 
        netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
@@ -4231,6 +4231,8 @@ static int virtnet_probe(struct virtio_device *vdev)
 
        virtio_device_ready(vdev);
 
+       _virtnet_set_queues(vi, vi->curr_queue_pairs);
+
        /* a random MAC address has been assigned, notify the device.
         * We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
         * because many devices work fine without getting MAC explicitly
@@ -4257,8 +4259,6 @@ static int virtnet_probe(struct virtio_device *vdev)
                goto free_unregister_netdev;
        }
 
-       virtnet_set_queues(vi, vi->curr_queue_pairs);
-
        /* Assume link up if device can't report link status,
           otherwise get link status from config. */
        netif_carrier_off(dev);
index bdb3a76..6043e63 100644 (file)
@@ -664,7 +664,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
        skb->protocol = htons(ETH_P_IPV6);
        skb->dev = dev;
 
-       rcu_read_lock_bh();
+       rcu_read_lock();
        nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
        neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
        if (unlikely(!neigh))
@@ -672,10 +672,10 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
        if (!IS_ERR(neigh)) {
                sock_confirm_neigh(skb, neigh);
                ret = neigh_output(neigh, skb, false);
-               rcu_read_unlock_bh();
+               rcu_read_unlock();
                return ret;
        }
-       rcu_read_unlock_bh();
+       rcu_read_unlock();
 
        IP6_INC_STATS(dev_net(dst->dev),
                      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -889,7 +889,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
                }
        }
 
-       rcu_read_lock_bh();
+       rcu_read_lock();
 
        neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
        if (!IS_ERR(neigh)) {
@@ -898,11 +898,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
                sock_confirm_neigh(skb, neigh);
                /* if crossing protocols, can not use the cached header */
                ret = neigh_output(neigh, skb, is_v6gw);
-               rcu_read_unlock_bh();
+               rcu_read_unlock();
                return ret;
        }
 
-       rcu_read_unlock_bh();
+       rcu_read_unlock();
        vrf_tx_error(skb->dev, skb);
        return -EINVAL;
 }
index 7874454..c9a9373 100644 (file)
@@ -623,6 +623,32 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
        return 1;
 }
 
+static bool vxlan_parse_gpe_proto(struct vxlanhdr *hdr, __be16 *protocol)
+{
+       struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)hdr;
+
+       /* Need to have Next Protocol set for interfaces in GPE mode. */
+       if (!gpe->np_applied)
+               return false;
+       /* "The initial version is 0. If a receiver does not support the
+        * version indicated it MUST drop the packet.
+        */
+       if (gpe->version != 0)
+               return false;
+       /* "When the O bit is set to 1, the packet is an OAM packet and OAM
+        * processing MUST occur." However, we don't implement OAM
+        * processing, thus drop the packet.
+        */
+       if (gpe->oam_flag)
+               return false;
+
+       *protocol = tun_p_to_eth_p(gpe->next_protocol);
+       if (!*protocol)
+               return false;
+
+       return true;
+}
+
 static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
                                          unsigned int off,
                                          struct vxlanhdr *vh, size_t hdrlen,
@@ -649,26 +675,24 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
        return vh;
 }
 
-static struct sk_buff *vxlan_gro_receive(struct sock *sk,
-                                        struct list_head *head,
-                                        struct sk_buff *skb)
+static struct vxlanhdr *vxlan_gro_prepare_receive(struct sock *sk,
+                                                 struct list_head *head,
+                                                 struct sk_buff *skb,
+                                                 struct gro_remcsum *grc)
 {
-       struct sk_buff *pp = NULL;
        struct sk_buff *p;
        struct vxlanhdr *vh, *vh2;
        unsigned int hlen, off_vx;
-       int flush = 1;
        struct vxlan_sock *vs = rcu_dereference_sk_user_data(sk);
        __be32 flags;
-       struct gro_remcsum grc;
 
-       skb_gro_remcsum_init(&grc);
+       skb_gro_remcsum_init(grc);
 
        off_vx = skb_gro_offset(skb);
        hlen = off_vx + sizeof(*vh);
        vh = skb_gro_header(skb, hlen, off_vx);
        if (unlikely(!vh))
-               goto out;
+               return NULL;
 
        skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
 
@@ -676,12 +700,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
 
        if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
                vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
-                                      vh->vx_vni, &grc,
+                                      vh->vx_vni, grc,
                                       !!(vs->flags &
                                          VXLAN_F_REMCSUM_NOPARTIAL));
 
                if (!vh)
-                       goto out;
+                       return NULL;
        }
 
        skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
@@ -698,12 +722,48 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
                }
        }
 
-       pp = call_gro_receive(eth_gro_receive, head, skb);
-       flush = 0;
+       return vh;
+}
 
-out:
+static struct sk_buff *vxlan_gro_receive(struct sock *sk,
+                                        struct list_head *head,
+                                        struct sk_buff *skb)
+{
+       struct sk_buff *pp = NULL;
+       struct gro_remcsum grc;
+       int flush = 1;
+
+       if (vxlan_gro_prepare_receive(sk, head, skb, &grc)) {
+               pp = call_gro_receive(eth_gro_receive, head, skb);
+               flush = 0;
+       }
        skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
+       return pp;
+}
+
+static struct sk_buff *vxlan_gpe_gro_receive(struct sock *sk,
+                                            struct list_head *head,
+                                            struct sk_buff *skb)
+{
+       const struct packet_offload *ptype;
+       struct sk_buff *pp = NULL;
+       struct gro_remcsum grc;
+       struct vxlanhdr *vh;
+       __be16 protocol;
+       int flush = 1;
 
+       vh = vxlan_gro_prepare_receive(sk, head, skb, &grc);
+       if (vh) {
+               if (!vxlan_parse_gpe_proto(vh, &protocol))
+                       goto out;
+               ptype = gro_find_receive_by_type(protocol);
+               if (!ptype)
+                       goto out;
+               pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+               flush = 0;
+       }
+out:
+       skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
        return pp;
 }
 
@@ -715,6 +775,21 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
        return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
+static int vxlan_gpe_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
+{
+       struct vxlanhdr *vh = (struct vxlanhdr *)(skb->data + nhoff);
+       const struct packet_offload *ptype;
+       int err = -ENOSYS;
+       __be16 protocol;
+
+       if (!vxlan_parse_gpe_proto(vh, &protocol))
+               return err;
+       ptype = gro_find_complete_by_type(protocol);
+       if (ptype)
+               err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
+       return err;
+}
+
 static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
                                         __u16 state, __be32 src_vni,
                                         __u16 ndm_flags)
@@ -1525,35 +1600,6 @@ out:
        unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
 }
 
-static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
-                               __be16 *protocol,
-                               struct sk_buff *skb, u32 vxflags)
-{
-       struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
-
-       /* Need to have Next Protocol set for interfaces in GPE mode. */
-       if (!gpe->np_applied)
-               return false;
-       /* "The initial version is 0. If a receiver does not support the
-        * version indicated it MUST drop the packet.
-        */
-       if (gpe->version != 0)
-               return false;
-       /* "When the O bit is set to 1, the packet is an OAM packet and OAM
-        * processing MUST occur." However, we don't implement OAM
-        * processing, thus drop the packet.
-        */
-       if (gpe->oam_flag)
-               return false;
-
-       *protocol = tun_p_to_eth_p(gpe->next_protocol);
-       if (!*protocol)
-               return false;
-
-       unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
-       return true;
-}
-
 static bool vxlan_set_mac(struct vxlan_dev *vxlan,
                          struct vxlan_sock *vs,
                          struct sk_buff *skb, __be32 vni)
@@ -1655,8 +1701,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
         * used by VXLAN extensions if explicitly requested.
         */
        if (vs->flags & VXLAN_F_GPE) {
-               if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
+               if (!vxlan_parse_gpe_proto(&unparsed, &protocol))
                        goto drop;
+               unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS;
                raw_proto = true;
        }
 
@@ -2516,7 +2563,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                }
 
                ndst = &rt->dst;
-               err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
+               err = skb_tunnel_check_pmtu(skb, ndst, vxlan_headroom(flags & VXLAN_F_GPE),
                                            netif_is_any_bridge_port(dev));
                if (err < 0) {
                        goto tx_error;
@@ -2577,7 +2624,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
                                goto out_unlock;
                }
 
-               err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
+               err = skb_tunnel_check_pmtu(skb, ndst,
+                                           vxlan_headroom((flags & VXLAN_F_GPE) | VXLAN_F_IPV6),
                                            netif_is_any_bridge_port(dev));
                if (err < 0) {
                        goto tx_error;
@@ -2989,14 +3037,12 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
        struct vxlan_rdst *dst = &vxlan->default_dst;
        struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
                                                         dst->remote_ifindex);
-       bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6);
 
        /* This check is different than dev->max_mtu, because it looks at
         * the lowerdev->mtu, rather than the static dev->max_mtu
         */
        if (lowerdev) {
-               int max_mtu = lowerdev->mtu -
-                             (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+               int max_mtu = lowerdev->mtu - vxlan_headroom(vxlan->cfg.flags);
                if (new_mtu > max_mtu)
                        return -EINVAL;
        }
@@ -3379,8 +3425,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
        tunnel_cfg.encap_rcv = vxlan_rcv;
        tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
        tunnel_cfg.encap_destroy = NULL;
-       tunnel_cfg.gro_receive = vxlan_gro_receive;
-       tunnel_cfg.gro_complete = vxlan_gro_complete;
+       if (vs->flags & VXLAN_F_GPE) {
+               tunnel_cfg.gro_receive = vxlan_gpe_gro_receive;
+               tunnel_cfg.gro_complete = vxlan_gpe_gro_complete;
+       } else {
+               tunnel_cfg.gro_receive = vxlan_gro_receive;
+               tunnel_cfg.gro_complete = vxlan_gro_complete;
+       }
 
        setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
 
@@ -3644,11 +3695,11 @@ static void vxlan_config_apply(struct net_device *dev,
        struct vxlan_dev *vxlan = netdev_priv(dev);
        struct vxlan_rdst *dst = &vxlan->default_dst;
        unsigned short needed_headroom = ETH_HLEN;
-       bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6);
        int max_mtu = ETH_MAX_MTU;
+       u32 flags = conf->flags;
 
        if (!changelink) {
-               if (conf->flags & VXLAN_F_GPE)
+               if (flags & VXLAN_F_GPE)
                        vxlan_raw_setup(dev);
                else
                        vxlan_ether_setup(dev);
@@ -3673,8 +3724,7 @@ static void vxlan_config_apply(struct net_device *dev,
 
                dev->needed_tailroom = lowerdev->needed_tailroom;
 
-               max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
-                                          VXLAN_HEADROOM);
+               max_mtu = lowerdev->mtu - vxlan_headroom(flags);
                if (max_mtu < ETH_MIN_MTU)
                        max_mtu = ETH_MIN_MTU;
 
@@ -3685,10 +3735,9 @@ static void vxlan_config_apply(struct net_device *dev,
        if (dev->mtu > max_mtu)
                dev->mtu = max_mtu;
 
-       if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
-               needed_headroom += VXLAN6_HEADROOM;
-       else
-               needed_headroom += VXLAN_HEADROOM;
+       if (flags & VXLAN_F_COLLECT_METADATA)
+               flags |= VXLAN_F_IPV6;
+       needed_headroom += vxlan_headroom(flags);
        dev->needed_headroom = needed_headroom;
 
        memcpy(&vxlan->cfg, conf, sizeof(*conf));
index a3de081..c3ff30a 100644 (file)
@@ -713,6 +713,12 @@ static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
        return vninode;
 }
 
+static void vxlan_vni_free(struct vxlan_vni_node *vninode)
+{
+       free_percpu(vninode->stats);
+       kfree(vninode);
+}
+
 static int vxlan_vni_add(struct vxlan_dev *vxlan,
                         struct vxlan_vni_group *vg,
                         u32 vni, union vxlan_addr *group,
@@ -740,7 +746,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan,
                                            &vninode->vnode,
                                            vxlan_vni_rht_params);
        if (err) {
-               kfree(vninode);
+               vxlan_vni_free(vninode);
                return err;
        }
 
@@ -763,8 +769,7 @@ static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
        struct vxlan_vni_node *v;
 
        v = container_of(rcu, struct vxlan_vni_node, rcu);
-       free_percpu(v->stats);
-       kfree(v);
+       vxlan_vni_free(v);
 }
 
 static int vxlan_vni_del(struct vxlan_dev *vxlan,
index 5bf7822..0ba714c 100644 (file)
@@ -6,7 +6,7 @@
 #include "allowedips.h"
 #include "peer.h"
 
-enum { MAX_ALLOWEDIPS_BITS = 128 };
+enum { MAX_ALLOWEDIPS_DEPTH = 129 };
 
 static struct kmem_cache *node_cache;
 
@@ -42,7 +42,7 @@ static void push_rcu(struct allowedips_node **stack,
                     struct allowedips_node __rcu *p, unsigned int *len)
 {
        if (rcu_access_pointer(p)) {
-               if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_BITS))
+               if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_DEPTH))
                        return;
                stack[(*len)++] = rcu_dereference_raw(p);
        }
@@ -55,7 +55,7 @@ static void node_free_rcu(struct rcu_head *rcu)
 
 static void root_free_rcu(struct rcu_head *rcu)
 {
-       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = {
+       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = {
                container_of(rcu, struct allowedips_node, rcu) };
        unsigned int len = 1;
 
@@ -68,7 +68,7 @@ static void root_free_rcu(struct rcu_head *rcu)
 
 static void root_remove_peer_lists(struct allowedips_node *root)
 {
-       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = { root };
+       struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = { root };
        unsigned int len = 1;
 
        while (len > 0 && (node = stack[--len])) {
index 78ebe28..3d1f64f 100644 (file)
@@ -593,16 +593,20 @@ bool __init wg_allowedips_selftest(void)
        wg_allowedips_remove_by_peer(&t, a, &mutex);
        test_negative(4, a, 192, 168, 0, 1);
 
-       /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_BITS) in free_node
+       /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_DEPTH) in free_node
         * if something goes wrong.
         */
-       for (i = 0; i < MAX_ALLOWEDIPS_BITS; ++i) {
-               part = cpu_to_be64(~(1LLU << (i % 64)));
-               memset(&ip, 0xff, 16);
-               memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
+       for (i = 0; i < 64; ++i) {
+               part = cpu_to_be64(~0LLU << i);
+               memset(&ip, 0xff, 8);
+               memcpy((u8 *)&ip + 8, &part, 8);
+               wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+               memcpy(&ip, &part, 8);
+               memset((u8 *)&ip + 8, 0, 8);
                wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
        }
-
+       memset(&ip, 0, 16);
+       wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
        wg_allowedips_free(&t, &mutex);
 
        wg_allowedips_init(&t);
index 1cebba7..139da57 100644 (file)
@@ -376,7 +376,6 @@ static void ath11k_ahb_ext_irq_enable(struct ath11k_base *ab)
                struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i];
 
                if (!irq_grp->napi_enabled) {
-                       dev_set_threaded(&irq_grp->napi_ndev, true);
                        napi_enable(&irq_grp->napi);
                        irq_grp->napi_enabled = true;
                }
index c899616..c630836 100644 (file)
@@ -466,7 +466,6 @@ void ath11k_pcic_ext_irq_enable(struct ath11k_base *ab)
                struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i];
 
                if (!irq_grp->napi_enabled) {
-                       dev_set_threaded(&irq_grp->napi_ndev, true);
                        napi_enable(&irq_grp->napi);
                        irq_grp->napi_enabled = true;
                }
index 6512267..4928e4e 100644 (file)
@@ -2144,8 +2144,7 @@ int ath12k_wmi_send_scan_start_cmd(struct ath12k *ar,
        struct wmi_tlv *tlv;
        void *ptr;
        int i, ret, len;
-       u32 *tmp_ptr;
-       u8 extraie_len_with_pad = 0;
+       u32 *tmp_ptr, extraie_len_with_pad = 0;
        struct ath12k_wmi_hint_short_ssid_arg *s_ssid = NULL;
        struct ath12k_wmi_hint_bssid_arg *hint_bssid = NULL;
 
index a75bfa9..dc2b3b4 100644 (file)
@@ -36,11 +36,6 @@ ath6kl_core-y += wmi.o
 ath6kl_core-y += core.o
 ath6kl_core-y += recovery.o
 
-# FIXME: temporarily silence -Wdangling-pointer on non W=1+ builds
-ifndef KBUILD_EXTRA_WARN
-CFLAGS_htc_mbox.o += $(call cc-disable-warning, dangling-pointer)
-endif
-
 ath6kl_core-$(CONFIG_NL80211_TESTMODE) += testmode.o
 ath6kl_core-$(CONFIG_ATH6KL_TRACING) += trace.o
 
index de8a2e2..2a90bb2 100644 (file)
@@ -1456,6 +1456,10 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
                params_size -= BRCMF_SCAN_PARAMS_V2_FIXED_SIZE;
                params_size += BRCMF_SCAN_PARAMS_FIXED_SIZE;
                params_v1 = kzalloc(params_size, GFP_KERNEL);
+               if (!params_v1) {
+                       err = -ENOMEM;
+                       goto exit_params;
+               }
                params_v1->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION);
                brcmf_scan_params_v2_to_v1(&params->params_v2_le, &params_v1->params_le);
                kfree(params);
@@ -1473,6 +1477,7 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
                        bphy_err(drvr, "error (%d)\n", err);
        }
 
+exit_params:
        kfree(params);
 exit:
        return err;
index 792adaf..bece267 100644 (file)
@@ -398,7 +398,12 @@ struct brcmf_scan_params_le {
                                 * fixed parameter portion is assumed, otherwise
                                 * ssid in the fixed portion is ignored
                                 */
-       __le16 channel_list[1]; /* list of chanspecs */
+       union {
+               __le16 padding; /* Reserve space for at least 1 entry for abort
+                                * which uses an on stack brcmf_scan_params_le
+                                */
+               DECLARE_FLEX_ARRAY(__le16, channel_list);       /* chanspecs */
+       };
 };
 
 struct brcmf_scan_params_v2_le {
index 7c4cc5f..dbd13f7 100644 (file)
@@ -6157,8 +6157,11 @@ static int airo_get_rate(struct net_device *dev,
        struct iw_param *vwrq = &wrqu->bitrate;
        struct airo_info *local = dev->ml_priv;
        StatusRid status_rid;           /* Card status info */
+       int ret;
 
-       readStatusRid(local, &status_rid, 1);
+       ret = readStatusRid(local, &status_rid, 1);
+       if (ret)
+               return -EBUSY;
 
        vwrq->value = le16_to_cpu(status_rid.currentXmitRate) * 500000;
        /* If more than one rate, set auto */
index b20409f..2097130 100644 (file)
@@ -66,6 +66,7 @@ config IWLMVM
        tristate "Intel Wireless WiFi MVM Firmware support"
        select WANT_DEV_COREDUMP
        depends on MAC80211
+       depends on PTP_1588_CLOCK_OPTIONAL
        help
          This is the driver that supports the MVM firmware. The list
          of the devices that use this firmware is available here:
index aa4320c..d594694 100644 (file)
@@ -84,7 +84,6 @@ const struct iwl_ht_params iwl_22000_ht_params = {
        .mac_addr_from_csr = 0x380,                                     \
        .ht_params = &iwl_22000_ht_params,                              \
        .nvm_ver = IWL_22000_NVM_VERSION,                               \
-       .trans.use_tfh = true,                                          \
        .trans.rf_id = true,                                            \
        .trans.gen2 = true,                                             \
        .nvm_type = IWL_NVM_EXT,                                        \
@@ -122,7 +121,6 @@ const struct iwl_ht_params iwl_22000_ht_params = {
 
 const struct iwl_cfg_trans_params iwl_qu_trans_cfg = {
        .mq_rx_supported = true,
-       .use_tfh = true,
        .rf_id = true,
        .gen2 = true,
        .device_family = IWL_DEVICE_FAMILY_22000,
@@ -134,7 +132,6 @@ const struct iwl_cfg_trans_params iwl_qu_trans_cfg = {
 
 const struct iwl_cfg_trans_params iwl_qu_medium_latency_trans_cfg = {
        .mq_rx_supported = true,
-       .use_tfh = true,
        .rf_id = true,
        .gen2 = true,
        .device_family = IWL_DEVICE_FAMILY_22000,
@@ -146,7 +143,6 @@ const struct iwl_cfg_trans_params iwl_qu_medium_latency_trans_cfg = {
 
 const struct iwl_cfg_trans_params iwl_qu_long_latency_trans_cfg = {
        .mq_rx_supported = true,
-       .use_tfh = true,
        .rf_id = true,
        .gen2 = true,
        .device_family = IWL_DEVICE_FAMILY_22000,
@@ -200,7 +196,6 @@ const struct iwl_cfg_trans_params iwl_ax200_trans_cfg = {
        .device_family = IWL_DEVICE_FAMILY_22000,
        .base_params = &iwl_22000_base_params,
        .mq_rx_supported = true,
-       .use_tfh = true,
        .rf_id = true,
        .gen2 = true,
        .bisr_workaround = 1,
index 742096c..241a9e3 100644 (file)
@@ -256,7 +256,6 @@ enum iwl_cfg_trans_ltr_delay {
  * @xtal_latency: power up latency to get the xtal stabilized
  * @extra_phy_cfg_flags: extra configuration flags to pass to the PHY
  * @rf_id: need to read rf_id to determine the firmware image
- * @use_tfh: use TFH
  * @gen2: 22000 and on transport operation
  * @mq_rx_supported: multi-queue rx support
  * @integrated: discrete or integrated
@@ -271,7 +270,6 @@ struct iwl_cfg_trans_params {
        u32 xtal_latency;
        u32 extra_phy_cfg_flags;
        u32 rf_id:1,
-           use_tfh:1,
            gen2:1,
            mq_rx_supported:1,
            integrated:1,
index bedd78a..4e4a60d 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2005-2014, 2018-2021 Intel Corporation
+ * Copyright (C) 2005-2014, 2018-2021, 2023 Intel Corporation
  * Copyright (C) 2015-2017 Intel Deutschland GmbH
  */
 #ifndef __iwl_fh_h__
@@ -71,7 +71,7 @@
 static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans,
                                             unsigned int chnl)
 {
-       if (trans->trans_cfg->use_tfh) {
+       if (trans->trans_cfg->gen2) {
                WARN_ON_ONCE(chnl >= 64);
                return TFH_TFDQ_CBB_TABLE + 8 * chnl;
        }
index b1af935..4bd7594 100644 (file)
@@ -2,7 +2,7 @@
 /*
  * Copyright (C) 2015 Intel Mobile Communications GmbH
  * Copyright (C) 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2019-2021 Intel Corporation
+ * Copyright (C) 2019-2021, 2023 Intel Corporation
  */
 #include <linux/kernel.h>
 #include <linux/bsearch.h>
@@ -42,7 +42,7 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
 
        WARN_ON(!ops->wait_txq_empty && !ops->wait_tx_queues_empty);
 
-       if (trans->trans_cfg->use_tfh) {
+       if (trans->trans_cfg->gen2) {
                trans->txqs.tfd.addr_size = 64;
                trans->txqs.tfd.max_tbs = IWL_TFH_NUM_TBS;
                trans->txqs.tfd.size = sizeof(struct iwl_tfh_tfd);
@@ -101,7 +101,7 @@ int iwl_trans_init(struct iwl_trans *trans)
 
        /* Some things must not change even if the config does */
        WARN_ON(trans->txqs.tfd.addr_size !=
-               (trans->trans_cfg->use_tfh ? 64 : 36));
+               (trans->trans_cfg->gen2 ? 64 : 36));
 
        snprintf(trans->dev_cmd_pool_name, sizeof(trans->dev_cmd_pool_name),
                 "iwl_cmd_pool:%s", dev_name(trans->dev));
index b83df06..b18c91c 100644 (file)
@@ -1450,7 +1450,7 @@ static inline bool iwl_mvm_has_new_station_api(const struct iwl_fw *fw)
 static inline bool iwl_mvm_has_new_tx_api(struct iwl_mvm *mvm)
 {
        /* TODO - replace with TLV once defined */
-       return mvm->trans->trans_cfg->use_tfh;
+       return mvm->trans->trans_cfg->gen2;
 }
 
 static inline bool iwl_mvm_has_unified_ucode(struct iwl_mvm *mvm)
index eacbbdb..3e988da 100644 (file)
@@ -819,7 +819,7 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans,
 
        iwl_enable_interrupts(trans);
 
-       if (trans->trans_cfg->use_tfh) {
+       if (trans->trans_cfg->gen2) {
                if (cpu == 1)
                        iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS,
                                       0xFFFF);
@@ -3394,7 +3394,7 @@ iwl_trans_pcie_dump_data(struct iwl_trans *trans,
                        u8 tfdidx;
                        u32 caplen, cmdlen;
 
-                       if (trans->trans_cfg->use_tfh)
+                       if (trans->trans_cfg->gen2)
                                tfdidx = idx;
                        else
                                tfdidx = ptr;
index 1337fa9..790e5b1 100644 (file)
@@ -364,7 +364,7 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans)
        for (txq_id = 0; txq_id < trans->trans_cfg->base_params->num_of_queues;
             txq_id++) {
                struct iwl_txq *txq = trans->txqs.txq[txq_id];
-               if (trans->trans_cfg->use_tfh)
+               if (trans->trans_cfg->gen2)
                        iwl_write_direct64(trans,
                                           FH_MEM_CBBC_QUEUE(trans, txq_id),
                                           txq->dma_addr);
index fbacbe9..5bb3cc3 100644 (file)
@@ -985,7 +985,7 @@ void iwl_txq_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq)
        bool active;
        u8 fifo;
 
-       if (trans->trans_cfg->use_tfh) {
+       if (trans->trans_cfg->gen2) {
                IWL_ERR(trans, "Queue %d is stuck %d %d\n", txq_id,
                        txq->read_ptr, txq->write_ptr);
                /* TODO: access new SCD registers and dump them */
@@ -1040,7 +1040,7 @@ int iwl_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num,
        if (WARN_ON(txq->entries || txq->tfds))
                return -EINVAL;
 
-       if (trans->trans_cfg->use_tfh)
+       if (trans->trans_cfg->gen2)
                tfd_sz = trans->txqs.tfd.size * slots_num;
 
        timer_setup(&txq->stuck_timer, iwl_txq_stuck_timer, 0);
@@ -1347,7 +1347,7 @@ static inline dma_addr_t iwl_txq_gen1_tfd_tb_get_addr(struct iwl_trans *trans,
        dma_addr_t addr;
        dma_addr_t hi_len;
 
-       if (trans->trans_cfg->use_tfh) {
+       if (trans->trans_cfg->gen2) {
                struct iwl_tfh_tfd *tfh_tfd = _tfd;
                struct iwl_tfh_tb *tfh_tb = &tfh_tfd->tbs[idx];
 
@@ -1408,7 +1408,7 @@ void iwl_txq_gen1_tfd_unmap(struct iwl_trans *trans,
 
        meta->tbs = 0;
 
-       if (trans->trans_cfg->use_tfh) {
+       if (trans->trans_cfg->gen2) {
                struct iwl_tfh_tfd *tfd_fh = (void *)tfd;
 
                tfd_fh->num_tbs = 0;
@@ -1625,7 +1625,7 @@ void iwl_txq_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 
                txq->entries[read_ptr].skb = NULL;
 
-               if (!trans->trans_cfg->use_tfh)
+               if (!trans->trans_cfg->gen2)
                        iwl_txq_gen1_inval_byte_cnt_tbl(trans, txq);
 
                iwl_txq_free_tfd(trans, txq);
index eca53bf..1e4a24a 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
- * Copyright (C) 2020-2022 Intel Corporation
+ * Copyright (C) 2020-2023 Intel Corporation
  */
 #ifndef __iwl_trans_queue_tx_h__
 #define __iwl_trans_queue_tx_h__
@@ -38,7 +38,7 @@ static inline void iwl_wake_queue(struct iwl_trans *trans,
 static inline void *iwl_txq_get_tfd(struct iwl_trans *trans,
                                    struct iwl_txq *txq, int idx)
 {
-       if (trans->trans_cfg->use_tfh)
+       if (trans->trans_cfg->gen2)
                idx = iwl_txq_get_cmd_index(txq, idx);
 
        return (u8 *)txq->tfds + trans->txqs.tfd.size * idx;
@@ -135,7 +135,7 @@ static inline u8 iwl_txq_gen1_tfd_get_num_tbs(struct iwl_trans *trans,
 {
        struct iwl_tfd *tfd;
 
-       if (trans->trans_cfg->use_tfh) {
+       if (trans->trans_cfg->gen2) {
                struct iwl_tfh_tfd *tfh_tfd = _tfd;
 
                return le16_to_cpu(tfh_tfd->num_tbs) & 0x1f;
@@ -151,7 +151,7 @@ static inline u16 iwl_txq_gen1_tfd_tb_get_len(struct iwl_trans *trans,
        struct iwl_tfd *tfd;
        struct iwl_tfd_tb *tb;
 
-       if (trans->trans_cfg->use_tfh) {
+       if (trans->trans_cfg->gen2) {
                struct iwl_tfh_tfd *tfh_tfd = _tfd;
                struct iwl_tfh_tb *tfh_tb = &tfh_tfd->tbs[idx];
 
index 2b0f332..1f3bde8 100644 (file)
@@ -577,7 +577,7 @@ struct tx_msg {
     struct tib_structure tib;
     struct phy_header phy;
     struct mac_header mac;
-    UCHAR  var[1];
+    UCHAR  var[];
 };
 
 /****** ECF Receive Control Structure (RCS) Area at Shared RAM offset 0x0800  */
index 68e8822..ccedea7 100644 (file)
@@ -128,12 +128,12 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev)
        case MT_EE_5GHZ:
                dev->mphy.cap.has_5ghz = true;
                break;
-       case MT_EE_2GHZ:
-               dev->mphy.cap.has_2ghz = true;
-               break;
        case MT_EE_DBDC:
                dev->dbdc_support = true;
                fallthrough;
+       case MT_EE_2GHZ:
+               dev->mphy.cap.has_2ghz = true;
+               break;
        default:
                dev->mphy.cap.has_2ghz = true;
                dev->mphy.cap.has_5ghz = true;
index f0a80c2..4153cd6 100644 (file)
@@ -231,10 +231,6 @@ int mt7921_dma_init(struct mt7921_dev *dev)
        if (ret)
                return ret;
 
-       ret = mt7921_wfsys_reset(dev);
-       if (ret)
-               return ret;
-
        /* init tx queue */
        ret = mt76_connac_init_tx_queues(dev->phy.mt76, MT7921_TXQ_BAND0,
                                         MT7921_TX_RING_SIZE,
index c69ce6d..f55caa0 100644 (file)
@@ -476,12 +476,6 @@ static int mt7921_load_firmware(struct mt7921_dev *dev)
 {
        int ret;
 
-       ret = mt76_get_field(dev, MT_CONN_ON_MISC, MT_TOP_MISC2_FW_N9_RDY);
-       if (ret && mt76_is_mmio(&dev->mt76)) {
-               dev_dbg(dev->mt76.dev, "Firmware is already download\n");
-               goto fw_loaded;
-       }
-
        ret = mt76_connac2_load_patch(&dev->mt76, mt7921_patch_name(dev));
        if (ret)
                return ret;
@@ -504,8 +498,6 @@ static int mt7921_load_firmware(struct mt7921_dev *dev)
                return -EIO;
        }
 
-fw_loaded:
-
 #ifdef CONFIG_PM
        dev->mt76.hw->wiphy->wowlan = &mt76_connac_wowlan_support;
 #endif /* CONFIG_PM */
index ddb1fa4..95610a1 100644 (file)
@@ -325,6 +325,10 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
        bus_ops->rmw = mt7921_rmw;
        dev->mt76.bus = bus_ops;
 
+       ret = mt7921e_mcu_fw_pmctrl(dev);
+       if (ret)
+               goto err_free_dev;
+
        ret = __mt7921e_mcu_drv_pmctrl(dev);
        if (ret)
                goto err_free_dev;
@@ -333,6 +337,10 @@ static int mt7921_pci_probe(struct pci_dev *pdev,
                    (mt7921_l1_rr(dev, MT_HW_REV) & 0xff);
        dev_info(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
 
+       ret = mt7921_wfsys_reset(dev);
+       if (ret)
+               goto err_free_dev;
+
        mt76_wr(dev, MT_WFDMA0_HOST_INT_ENA, 0);
 
        mt76_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
index 1db2d59..a4bbac9 100644 (file)
@@ -3026,17 +3026,18 @@ static ssize_t rtw89_debug_priv_send_h2c_set(struct file *filp,
        struct rtw89_debugfs_priv *debugfs_priv = filp->private_data;
        struct rtw89_dev *rtwdev = debugfs_priv->rtwdev;
        u8 *h2c;
+       int ret;
        u16 h2c_len = count / 2;
 
        h2c = rtw89_hex2bin_user(rtwdev, user_buf, count);
        if (IS_ERR(h2c))
                return -EFAULT;
 
-       rtw89_fw_h2c_raw(rtwdev, h2c, h2c_len);
+       ret = rtw89_fw_h2c_raw(rtwdev, h2c, h2c_len);
 
        kfree(h2c);
 
-       return count;
+       return ret ? ret : count;
 }
 
 static int
index b114bab..c93e625 100644 (file)
@@ -2524,7 +2524,7 @@ static int cmac_dma_init(struct rtw89_dev *rtwdev, u8 mac_idx)
        u32 reg;
        int ret;
 
-       if (chip_id != RTL8852A && chip_id != RTL8852B)
+       if (chip_id != RTL8852B)
                return 0;
 
        ret = rtw89_mac_check_mac_en(rtwdev, mac_idx, RTW89_CMAC_SEL);
index c8d20cd..88f760a 100644 (file)
@@ -396,7 +396,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
        struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
        struct xen_netif_tx_request *txp = first;
 
-       nr_slots = shinfo->nr_frags + 1;
+       nr_slots = shinfo->nr_frags + frag_overflow + 1;
 
        copy_count(skb) = 0;
        XENVIF_TX_CB(skb)->split_mask = 0;
@@ -462,8 +462,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                }
        }
 
-       for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
-            shinfo->nr_frags++, gop++) {
+       for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
+            shinfo->nr_frags++, gop++, nr_slots--) {
                index = pending_index(queue->pending_cons++);
                pending_idx = queue->pending_ring[index];
                xenvif_tx_create_map_op(queue, pending_idx, txp,
@@ -476,12 +476,12 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                        txp++;
        }
 
-       if (frag_overflow) {
+       if (nr_slots > 0) {
 
                shinfo = skb_shinfo(nskb);
                frags = shinfo->frags;
 
-               for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
+               for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
                     shinfo->nr_frags++, txp++, gop++) {
                        index = pending_index(queue->pending_cons++);
                        pending_idx = queue->pending_ring[index];
@@ -492,6 +492,11 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
                }
 
                skb_shinfo(skb)->frag_list = nskb;
+       } else if (nskb) {
+               /* A frag_list skb was allocated but it is no longer needed
+                * because enough slots were converted to copy ops above.
+                */
+               kfree_skb(nskb);
        }
 
        (*copy_ops) = cop - queue->tx_copy_ops;
index 47d7ba2..f3a01b7 100644 (file)
@@ -3431,10 +3431,40 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
 
        ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids);
        if (ret) {
-               dev_err(ctrl->device,
-                       "globally duplicate IDs for nsid %d\n", info->nsid);
+               /*
+                * We've found two different namespaces on two different
+                * subsystems that report the same ID.  This is pretty nasty
+                * for anything that actually requires unique device
+                * identification.  In the kernel we need this for multipathing,
+                * and in user space the /dev/disk/by-id/ links rely on it.
+                *
+                * If the device also claims to be multi-path capable back off
+                * here now and refuse the probe the second device as this is a
+                * recipe for data corruption.  If not this is probably a
+                * cheap consumer device if on the PCIe bus, so let the user
+                * proceed and use the shiny toy, but warn that with changing
+                * probing order (which due to our async probing could just be
+                * device taking longer to startup) the other device could show
+                * up at any time.
+                */
                nvme_print_device_info(ctrl);
-               return ret;
+               if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */
+                   ((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) &&
+                    info->is_shared)) {
+                       dev_err(ctrl->device,
+                               "ignoring nsid %d because of duplicate IDs\n",
+                               info->nsid);
+                       return ret;
+               }
+
+               dev_err(ctrl->device,
+                       "clearing duplicate IDs for nsid %d\n", info->nsid);
+               dev_err(ctrl->device,
+                       "use of /dev/disk/by-id/ may cause data corruption\n");
+               memset(&info->ids.nguid, 0, sizeof(info->ids.nguid));
+               memset(&info->ids.uuid, 0, sizeof(info->ids.uuid));
+               memset(&info->ids.eui64, 0, sizeof(info->ids.eui64));
+               ctrl->quirks |= NVME_QUIRK_BOGUS_NID;
        }
 
        mutex_lock(&ctrl->subsys->lock);
@@ -3903,6 +3933,12 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
         */
        nvme_mpath_clear_ctrl_paths(ctrl);
 
+       /*
+        * Unquiesce io queues so any pending IO won't hang, especially
+        * those submitted from scan work
+        */
+       nvme_unquiesce_io_queues(ctrl);
+
        /* prevent racing with ns scanning */
        flush_work(&ctrl->scan_work);
 
@@ -3912,10 +3948,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
         * removing the namespaces' disks; fail all the queues now to avoid
         * potentially having to clean up the failed sync later.
         */
-       if (ctrl->state == NVME_CTRL_DEAD) {
+       if (ctrl->state == NVME_CTRL_DEAD)
                nvme_mark_namespaces_dead(ctrl);
-               nvme_unquiesce_io_queues(ctrl);
-       }
 
        /* this is a no-op when called from the controller reset handler */
        nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
index 83d2e68..1ba10a5 100644 (file)
@@ -27,7 +27,7 @@ void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
 
        /* create debugfs directory and attribute */
        parent = debugfs_create_dir(dev_name, NULL);
-       if (!parent) {
+       if (IS_ERR(parent)) {
                pr_warn("%s: failed to create debugfs directory\n", dev_name);
                return;
        }
index 691f2df..1cd2bf8 100644 (file)
@@ -2548,14 +2548,24 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
         * the controller.  Abort any ios on the association and let the
         * create_association error path resolve things.
         */
-       if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
-               __nvme_fc_abort_outstanding_ios(ctrl, true);
+       enum nvme_ctrl_state state;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ctrl->lock, flags);
+       state = ctrl->ctrl.state;
+       if (state == NVME_CTRL_CONNECTING) {
                set_bit(ASSOC_FAILED, &ctrl->flags);
+               spin_unlock_irqrestore(&ctrl->lock, flags);
+               __nvme_fc_abort_outstanding_ios(ctrl, true);
+               dev_warn(ctrl->ctrl.device,
+                       "NVME-FC{%d}: transport error during (re)connect\n",
+                       ctrl->cnum);
                return;
        }
+       spin_unlock_irqrestore(&ctrl->lock, flags);
 
        /* Otherwise, only proceed if in LIVE state - e.g. on first error */
-       if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+       if (state != NVME_CTRL_LIVE)
                return;
 
        dev_warn(ctrl->ctrl.device,
@@ -3110,7 +3120,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
         */
 
        ret = nvme_enable_ctrl(&ctrl->ctrl);
-       if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
+       if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+               ret = -EIO;
+       if (ret)
                goto out_disconnect_admin_queue;
 
        ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
@@ -3120,7 +3132,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
        nvme_unquiesce_admin_queue(&ctrl->ctrl);
 
        ret = nvme_init_ctrl_finish(&ctrl->ctrl, false);
-       if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
+       if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+               ret = -EIO;
+       if (ret)
                goto out_disconnect_admin_queue;
 
        /* sanity checks */
@@ -3165,10 +3179,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
                else
                        ret = nvme_fc_recreate_io_queues(ctrl);
        }
-       if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
-               goto out_term_aen_ops;
 
+       spin_lock_irqsave(&ctrl->lock, flags);
+       if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+               ret = -EIO;
+       if (ret) {
+               spin_unlock_irqrestore(&ctrl->lock, flags);
+               goto out_term_aen_ops;
+       }
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+       spin_unlock_irqrestore(&ctrl->lock, flags);
 
        ctrl->ctrl.nr_reconnects = 0;
 
@@ -3180,6 +3200,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
 out_term_aen_ops:
        nvme_fc_term_aen_ops(ctrl);
 out_disconnect_admin_queue:
+       dev_warn(ctrl->ctrl.device,
+               "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n",
+               ctrl->cnum, ctrl->association_id, ret);
        /* send a Disconnect(association) LS to fc-nvme target */
        nvme_fc_xmt_disconnect_assoc(ctrl);
        spin_lock_irqsave(&ctrl->lock, flags);
index 5c3250f..d39f321 100644 (file)
@@ -786,11 +786,9 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
        if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
                return 0;
 
-       rcu_read_lock();
        req = READ_ONCE(ioucmd->cookie);
        if (req && blk_rq_is_poll(req))
                ret = blk_rq_poll(req, iob, poll_flags);
-       rcu_read_unlock();
        return ret;
 }
 #ifdef CONFIG_NVME_MULTIPATH
index 7272572..2f57da1 100644 (file)
@@ -967,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
                struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 
                dma_unmap_page(dev->dev, iod->meta_dma,
-                              rq_integrity_vec(req)->bv_len, rq_data_dir(req));
+                              rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
        }
 
        if (blk_rq_nr_phys_segments(req))
@@ -1298,9 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
         */
        if (nvme_should_reset(dev, csts)) {
                nvme_warn_reset(dev, csts);
-               nvme_dev_disable(dev, false);
-               nvme_reset_ctrl(&dev->ctrl);
-               return BLK_EH_DONE;
+               goto disable;
        }
 
        /*
@@ -1351,10 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
                         "I/O %d QID %d timeout, reset controller\n",
                         req->tag, nvmeq->qid);
                nvme_req(req)->flags |= NVME_REQ_CANCELLED;
-               nvme_dev_disable(dev, false);
-               nvme_reset_ctrl(&dev->ctrl);
-
-               return BLK_EH_DONE;
+               goto disable;
        }
 
        if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
@@ -1391,6 +1386,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
         * as the device then is in a faulty state.
         */
        return BLK_EH_RESET_TIMER;
+
+disable:
+       if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
+               return BLK_EH_DONE;
+
+       nvme_dev_disable(dev, false);
+       if (nvme_try_sched_reset(&dev->ctrl))
+               nvme_unquiesce_io_queues(&dev->ctrl);
+       return BLK_EH_DONE;
 }
 
 static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -3278,6 +3282,10 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
        case pci_channel_io_frozen:
                dev_warn(dev->ctrl.device,
                        "frozen state error detected, reset controller\n");
+               if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
+                       nvme_dev_disable(dev, true);
+                       return PCI_ERS_RESULT_DISCONNECT;
+               }
                nvme_dev_disable(dev, false);
                return PCI_ERS_RESULT_NEED_RESET;
        case pci_channel_io_perm_failure:
@@ -3294,7 +3302,8 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
 
        dev_info(dev->ctrl.device, "restart after slot reset\n");
        pci_restore_state(pdev);
-       nvme_reset_ctrl(&dev->ctrl);
+       if (!nvme_try_sched_reset(&dev->ctrl))
+               nvme_unquiesce_io_queues(&dev->ctrl);
        return PCI_ERS_RESULT_RECOVERED;
 }
 
@@ -3393,9 +3402,12 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_DEVICE(0x1d97, 0x2263),   /* SPCC */
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE(0x144d, 0xa80b),   /* Samsung PM9B1 256G and 512G */
-               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+               .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES |
+                               NVME_QUIRK_BOGUS_NID, },
        { PCI_DEVICE(0x144d, 0xa809),   /* Samsung MZALQ256HBJD 256G */
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+       { PCI_DEVICE(0x144d, 0xa802),   /* Samsung SM953 */
+               .driver_data = NVME_QUIRK_BOGUS_NID, },
        { PCI_DEVICE(0x1cc4, 0x6303),   /* UMIS RPJTJ512MGE1QDY 512G */
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE(0x1cc4, 0x6302),   /* UMIS RPJTJ256MGE1QDY 256G */
index d433b2e..337a624 100644 (file)
@@ -883,6 +883,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
                goto out_cleanup_tagset;
 
        if (!new) {
+               nvme_start_freeze(&ctrl->ctrl);
                nvme_unquiesce_io_queues(&ctrl->ctrl);
                if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
                        /*
@@ -891,6 +892,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
                         * to be safe.
                         */
                        ret = -ENODEV;
+                       nvme_unfreeze(&ctrl->ctrl);
                        goto out_wait_freeze_timed_out;
                }
                blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
@@ -940,7 +942,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
        if (ctrl->ctrl.queue_count > 1) {
-               nvme_start_freeze(&ctrl->ctrl);
                nvme_quiesce_io_queues(&ctrl->ctrl);
                nvme_sync_io_queues(&ctrl->ctrl);
                nvme_rdma_stop_io_queues(ctrl);
index 45e9181..212e1b0 100644 (file)
@@ -92,7 +92,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
         * we have no UUID set
         */
        if (uuid_is_null(&ids->uuid)) {
-               dev_warn_ratelimited(dev,
+               dev_warn_once(dev,
                        "No UUID available providing old NGUID\n");
                return sysfs_emit(buf, "%pU\n", ids->nguid);
        }
index 9ce417c..5b332d9 100644 (file)
@@ -1868,6 +1868,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
                goto out_cleanup_connect_q;
 
        if (!new) {
+               nvme_start_freeze(ctrl);
                nvme_unquiesce_io_queues(ctrl);
                if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
                        /*
@@ -1876,6 +1877,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
                         * to be safe.
                         */
                        ret = -ENODEV;
+                       nvme_unfreeze(ctrl);
                        goto out_wait_freeze_timed_out;
                }
                blk_mq_update_nr_hw_queues(ctrl->tagset,
@@ -1980,7 +1982,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
        if (ctrl->queue_count <= 1)
                return;
        nvme_quiesce_admin_queue(ctrl);
-       nvme_start_freeze(ctrl);
        nvme_quiesce_io_queues(ctrl);
        nvme_sync_io_queues(ctrl);
        nvme_tcp_stop_io_queues(ctrl);
index 12316ab..ec85578 100644 (file)
 int nvme_revalidate_zones(struct nvme_ns *ns)
 {
        struct request_queue *q = ns->queue;
-       int ret;
 
-       ret = blk_revalidate_disk_zones(ns->disk, NULL);
-       if (!ret)
-               blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
-       return ret;
+       blk_queue_chunk_sectors(q, ns->zsze);
+       blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
+
+       return blk_revalidate_disk_zones(ns->disk, NULL);
 }
 
 static int nvme_set_max_append(struct nvme_ctrl *ctrl)
index f2d24b2..48d5df0 100644 (file)
@@ -373,7 +373,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
                goto out_cleanup_tagset;
 
        ctrl->ctrl.max_hw_sectors =
-               (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
+               (NVME_LOOP_MAX_SEGMENTS - 1) << PAGE_SECTORS_SHIFT;
 
        nvme_unquiesce_admin_queue(&ctrl->ctrl);
 
index 71a9c1c..9fe07d7 100644 (file)
@@ -102,14 +102,14 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
         * which depends on the host's memory fragementation. To solve this,
         * ensure mdts is limited to the pages equal to the number of segments.
         */
-       max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
+       max_hw_sectors = min_not_zero(pctrl->max_segments << PAGE_SECTORS_SHIFT,
                                      pctrl->max_hw_sectors);
 
        /*
         * nvmet_passthru_map_sg is limitted to using a single bio so limit
         * the mdts based on BIO_MAX_VECS as well
         */
-       max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9),
+       max_hw_sectors = min_not_zero(BIO_MAX_VECS << PAGE_SECTORS_SHIFT,
                                      max_hw_sectors);
 
        page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
index e40f10b..da9826a 100644 (file)
@@ -55,7 +55,7 @@ config OF_FLATTREE
 
 config OF_EARLY_FLATTREE
        bool
-       select DMA_DECLARE_COHERENT if HAS_DMA
+       select DMA_DECLARE_COHERENT if HAS_DMA && HAS_IOMEM
        select OF_FLATTREE
 
 config OF_PROMTREE
index e311d40..4999636 100644 (file)
@@ -63,15 +63,14 @@ int of_reconfig_notifier_unregister(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
 
-#ifdef DEBUG
-const char *action_names[] = {
+static const char *action_names[] = {
+       [0] = "INVALID",
        [OF_RECONFIG_ATTACH_NODE] = "ATTACH_NODE",
        [OF_RECONFIG_DETACH_NODE] = "DETACH_NODE",
        [OF_RECONFIG_ADD_PROPERTY] = "ADD_PROPERTY",
        [OF_RECONFIG_REMOVE_PROPERTY] = "REMOVE_PROPERTY",
        [OF_RECONFIG_UPDATE_PROPERTY] = "UPDATE_PROPERTY",
 };
-#endif
 
 int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p)
 {
@@ -620,21 +619,9 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
                }
 
                ret = __of_add_property(ce->np, ce->prop);
-               if (ret) {
-                       pr_err("changeset: add_property failed @%pOF/%s\n",
-                               ce->np,
-                               ce->prop->name);
-                       break;
-               }
                break;
        case OF_RECONFIG_REMOVE_PROPERTY:
                ret = __of_remove_property(ce->np, ce->prop);
-               if (ret) {
-                       pr_err("changeset: remove_property failed @%pOF/%s\n",
-                               ce->np,
-                               ce->prop->name);
-                       break;
-               }
                break;
 
        case OF_RECONFIG_UPDATE_PROPERTY:
@@ -648,20 +635,17 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
                }
 
                ret = __of_update_property(ce->np, ce->prop, &old_prop);
-               if (ret) {
-                       pr_err("changeset: update_property failed @%pOF/%s\n",
-                               ce->np,
-                               ce->prop->name);
-                       break;
-               }
                break;
        default:
                ret = -EINVAL;
        }
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
 
-       if (ret)
+       if (ret) {
+               pr_err("changeset: apply failed: %-15s %pOF:%s\n",
+                      action_names[ce->action], ce->np, ce->prop->name);
                return ret;
+       }
 
        switch (ce->action) {
        case OF_RECONFIG_ATTACH_NODE:
@@ -947,6 +931,9 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action,
        if (!ce)
                return -ENOMEM;
 
+       if (WARN_ON(action >= ARRAY_SIZE(action_names)))
+               return -EINVAL;
+
        /* get a reference to the node */
        ce->action = action;
        ce->np = of_node_get(np);
index f26d2ba..6827834 100644 (file)
@@ -184,7 +184,8 @@ int __init ima_free_kexec_buffer(void)
        if (ret)
                return ret;
 
-       return memblock_phys_free(addr, size);
+       memblock_free_late(addr, size);
+       return 0;
 }
 #endif
 
index 051e29b..6a557eb 100644 (file)
@@ -141,7 +141,7 @@ struct platform_device *of_device_alloc(struct device_node *np,
        }
 
        /* setup generic device info */
-       device_set_node(&dev->dev, of_fwnode_handle(np));
+       device_set_node(&dev->dev, of_fwnode_handle(of_node_get(np)));
        dev->dev.parent = parent ? : &platform_bus;
 
        if (bus_id)
@@ -239,7 +239,7 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
        dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
 
        /* setup generic device info */
-       device_set_node(&dev->dev, of_fwnode_handle(node));
+       device_set_node(&dev->dev, of_fwnode_handle(of_node_get(node)));
        dev->dev.parent = parent ? : &platform_bus;
        dev->dev.platform_data = platform_data;
        if (bus_id)
@@ -552,7 +552,7 @@ static int __init of_platform_default_populate_init(void)
                        if (!of_get_property(node, "linux,opened", NULL) ||
                            !of_get_property(node, "linux,boot-display", NULL))
                                continue;
-                       dev = of_platform_device_create(node, "of-display.0", NULL);
+                       dev = of_platform_device_create(node, "of-display", NULL);
                        of_node_put(node);
                        if (WARN_ON(!dev))
                                return -ENOMEM;
index a406a12..b545fcb 100644 (file)
@@ -664,12 +664,12 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
        memset(&args, 0, sizeof(args));
 
        EXPECT_BEGIN(KERN_INFO,
-                    "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle");
+                    "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678");
 
        rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-phandle",
                                            "phandle", 0, &args);
        EXPECT_END(KERN_INFO,
-                  "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle");
+                  "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678");
 
        unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 
index 3f46e49..919cc53 100644 (file)
@@ -177,25 +177,24 @@ unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp)
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_power);
 
 /**
- * dev_pm_opp_get_freq() - Gets the frequency corresponding to an available opp
- * @opp:       opp for which frequency has to be returned for
+ * dev_pm_opp_get_freq_indexed() - Gets the frequency corresponding to an
+ *                                available opp with specified index
+ * @opp: opp for which frequency has to be returned for
+ * @index: index of the frequency within the required opp
  *
- * Return: frequency in hertz corresponding to the opp, else
- * return 0
+ * Return: frequency in hertz corresponding to the opp with specified index,
+ * else return 0
  */
-unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
+unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index)
 {
-       if (IS_ERR_OR_NULL(opp)) {
+       if (IS_ERR_OR_NULL(opp) || index >= opp->opp_table->clk_count) {
                pr_err("%s: Invalid parameters\n", __func__);
                return 0;
        }
 
-       if (!assert_single_clk(opp->opp_table))
-               return 0;
-
-       return opp->rates[0];
+       return opp->rates[index];
 }
-EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq);
+EXPORT_SYMBOL_GPL(dev_pm_opp_get_freq_indexed);
 
 /**
  * dev_pm_opp_get_level() - Gets the level corresponding to an available opp
@@ -227,20 +226,18 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_level);
 unsigned int dev_pm_opp_get_required_pstate(struct dev_pm_opp *opp,
                                            unsigned int index)
 {
-       struct opp_table *opp_table = opp->opp_table;
-
        if (IS_ERR_OR_NULL(opp) || !opp->available ||
-           index >= opp_table->required_opp_count) {
+           index >= opp->opp_table->required_opp_count) {
                pr_err("%s: Invalid parameters\n", __func__);
                return 0;
        }
 
        /* required-opps not fully initialized yet */
-       if (lazy_linking_pending(opp_table))
+       if (lazy_linking_pending(opp->opp_table))
                return 0;
 
        /* The required OPP table must belong to a genpd */
-       if (unlikely(!opp_table->required_opp_tables[index]->is_genpd)) {
+       if (unlikely(!opp->opp_table->required_opp_tables[index]->is_genpd)) {
                pr_err("%s: Performance state is only valid for genpds.\n", __func__);
                return 0;
        }
@@ -450,7 +447,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
 /* Helpers to read keys */
 static unsigned long _read_freq(struct dev_pm_opp *opp, int index)
 {
-       return opp->rates[0];
+       return opp->rates[index];
 }
 
 static unsigned long _read_level(struct dev_pm_opp *opp, int index)
@@ -626,6 +623,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact);
 
+/**
+ * dev_pm_opp_find_freq_exact_indexed() - Search for an exact freq for the
+ *                                      clock corresponding to the index
+ * @dev:       Device for which we do this operation
+ * @freq:      frequency to search for
+ * @index:     Clock index
+ * @available: true/false - match for available opp
+ *
+ * Search for the matching exact OPP for the clock corresponding to the
+ * specified index from a starting freq for a device.
+ *
+ * Return: matching *opp , else returns ERR_PTR in case of error and should be
+ * handled using IS_ERR. Error return values can be:
+ * EINVAL:     for bad pointer
+ * ERANGE:     no match found for search
+ * ENODEV:     if device not found in list of registered devices
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *
+dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
+                                  u32 index, bool available)
+{
+       return _find_key_exact(dev, freq, index, available, _read_freq, NULL);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_exact_indexed);
+
 static noinline struct dev_pm_opp *_find_freq_ceil(struct opp_table *opp_table,
                                                   unsigned long *freq)
 {
@@ -659,6 +684,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil);
 
 /**
+ * dev_pm_opp_find_freq_ceil_indexed() - Search for a rounded ceil freq for the
+ *                                      clock corresponding to the index
+ * @dev:       Device for which we do this operation
+ * @freq:      Start frequency
+ * @index:     Clock index
+ *
+ * Search for the matching ceil *available* OPP for the clock corresponding to
+ * the specified index from a starting freq for a device.
+ *
+ * Return: matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:     for bad pointer
+ * ERANGE:     no match found for search
+ * ENODEV:     if device not found in list of registered devices
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *
+dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq,
+                                 u32 index)
+{
+       return _find_key_ceil(dev, freq, index, true, _read_freq, NULL);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_ceil_indexed);
+
+/**
  * dev_pm_opp_find_freq_floor() - Search for a rounded floor freq
  * @dev:       device for which we do this operation
  * @freq:      Start frequency
@@ -684,6 +737,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
 /**
+ * dev_pm_opp_find_freq_floor_indexed() - Search for a rounded floor freq for the
+ *                                       clock corresponding to the index
+ * @dev:       Device for which we do this operation
+ * @freq:      Start frequency
+ * @index:     Clock index
+ *
+ * Search for the matching floor *available* OPP for the clock corresponding to
+ * the specified index from a starting freq for a device.
+ *
+ * Return: matching *opp and refreshes *freq accordingly, else returns
+ * ERR_PTR in case of error and should be handled using IS_ERR. Error return
+ * values can be:
+ * EINVAL:     for bad pointer
+ * ERANGE:     no match found for search
+ * ENODEV:     if device not found in list of registered devices
+ *
+ * The callers are required to call dev_pm_opp_put() for the returned OPP after
+ * use.
+ */
+struct dev_pm_opp *
+dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq,
+                                  u32 index)
+{
+       return _find_key_floor(dev, freq, index, true, _read_freq, NULL);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor_indexed);
+
+/**
  * dev_pm_opp_find_level_exact() - search for an exact level
  * @dev:               device for which we do this operation
  * @level:             level to search for
@@ -2379,7 +2460,7 @@ static int _opp_attach_genpd(struct opp_table *opp_table, struct device *dev,
 
                virt_dev = dev_pm_domain_attach_by_name(dev, *name);
                if (IS_ERR_OR_NULL(virt_dev)) {
-                       ret = PTR_ERR(virt_dev) ? : -ENODEV;
+                       ret = virt_dev ? PTR_ERR(virt_dev) : -ENODEV;
                        dev_err(dev, "Couldn't attach to pm_domain: %d\n", ret);
                        goto err;
                }
index 3c35060..12c429b 100644 (file)
@@ -24,7 +24,7 @@
 /**
  * dev_pm_opp_init_cpufreq_table() - create a cpufreq table for a device
  * @dev:       device for which we do this operation
- * @table:     Cpufreq table returned back to caller
+ * @opp_table: Cpufreq table returned back to caller
  *
  * Generate a cpufreq table for a provided device- this assumes that the
  * opp table is already initialized and ready for usage.
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_init_cpufreq_table);
 /**
  * dev_pm_opp_free_cpufreq_table() - free the cpufreq table
  * @dev:       device for which we do this operation
- * @table:     table to free
+ * @opp_table: table to free
  *
  * Free up the table allocated by dev_pm_opp_init_cpufreq_table
  */
index bf3405f..8b1dcd5 100644 (file)
@@ -121,6 +121,8 @@ module_param(sba_reserve_agpgart, int, 0444);
 MODULE_PARM_DESC(sba_reserve_agpgart, "Reserve half of IO pdir as AGPGART");
 #endif
 
+struct proc_dir_entry *proc_runway_root __ro_after_init;
+struct proc_dir_entry *proc_mckinley_root __ro_after_init;
 
 /************************************
 ** SBA register read and write support
@@ -1968,11 +1970,15 @@ static int __init sba_driver_callback(struct parisc_device *dev)
 #ifdef CONFIG_PROC_FS
        switch (dev->id.hversion) {
        case PLUTO_MCKINLEY_PORT:
+               if (!proc_mckinley_root)
+                       proc_mckinley_root = proc_mkdir("bus/mckinley", NULL);
                root = proc_mckinley_root;
                break;
        case ASTRO_RUNWAY_PORT:
        case IKE_MERCED_PORT:
        default:
+               if (!proc_runway_root)
+                       proc_runway_root = proc_mkdir("bus/runway", NULL);
                root = proc_runway_root;
                break;
        }
index 0dcc497..5e44752 100644 (file)
@@ -28,7 +28,6 @@
 #include <linux/sysctl.h>
 
 #include <asm/io.h>
-#include <asm/dma.h>
 #include <linux/uaccess.h>
 #include <asm/superio.h>
 
@@ -226,9 +225,9 @@ static int parport_PS2_supported(struct parport *pb)
 
 /* --- Initialisation code -------------------------------- */
 
-struct parport *parport_gsc_probe_port(unsigned long base,
+static struct parport *parport_gsc_probe_port(unsigned long base,
                                       unsigned long base_hi, int irq,
-                                      int dma, struct parisc_device *padev)
+                                      struct parisc_device *padev)
 {
        struct parport_gsc_private *priv;
        struct parport_operations *ops;
@@ -250,12 +249,9 @@ struct parport *parport_gsc_probe_port(unsigned long base,
        }
        priv->ctr = 0xc;
        priv->ctr_writable = 0xff;
-       priv->dma_buf = NULL;
-       priv->dma_handle = 0;
        p->base = base;
        p->base_hi = base_hi;
        p->irq = irq;
-       p->dma = dma;
        p->modes = PARPORT_MODE_PCSPP | PARPORT_MODE_SAFEININT;
        p->ops = ops;
        p->private_data = priv;
@@ -286,17 +282,9 @@ struct parport *parport_gsc_probe_port(unsigned long base,
        if (p->irq == PARPORT_IRQ_AUTO) {
                p->irq = PARPORT_IRQ_NONE;
        }
-       if (p->irq != PARPORT_IRQ_NONE) {
+       if (p->irq != PARPORT_IRQ_NONE)
                pr_cont(", irq %d", p->irq);
 
-               if (p->dma == PARPORT_DMA_AUTO) {
-                       p->dma = PARPORT_DMA_NONE;
-               }
-       }
-       if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq
-                                           is mandatory (see above) */
-               p->dma = PARPORT_DMA_NONE;
-
        pr_cont(" [");
 #define printmode(x)                                                   \
 do {                                                                   \
@@ -321,7 +309,6 @@ do {                                                                        \
                        pr_warn("%s: irq %d in use, resorting to polled operation\n",
                                p->name, p->irq);
                        p->irq = PARPORT_IRQ_NONE;
-                       p->dma = PARPORT_DMA_NONE;
                }
        }
 
@@ -369,8 +356,7 @@ static int __init parport_init_chip(struct parisc_device *dev)
                pr_info("%s: enhanced parport-modes not supported\n", __func__);
        }
        
-       p = parport_gsc_probe_port(port, 0, dev->irq,
-                       /* PARPORT_IRQ_NONE */ PARPORT_DMA_NONE, dev);
+       p = parport_gsc_probe_port(port, 0, dev->irq, dev);
        if (p)
                parport_count++;
        dev_set_drvdata(&dev->dev, p);
@@ -382,16 +368,10 @@ static void __exit parport_remove_chip(struct parisc_device *dev)
 {
        struct parport *p = dev_get_drvdata(&dev->dev);
        if (p) {
-               struct parport_gsc_private *priv = p->private_data;
                struct parport_operations *ops = p->ops;
                parport_remove_port(p);
-               if (p->dma != PARPORT_DMA_NONE)
-                       free_dma(p->dma);
                if (p->irq != PARPORT_IRQ_NONE)
                        free_irq(p->irq, p);
-               if (priv->dma_buf)
-                       dma_free_coherent(&priv->dev->dev, PAGE_SIZE,
-                                         priv->dma_buf, priv->dma_handle);
                kfree (p->private_data);
                parport_put_port(p);
                kfree (ops); /* hope no-one cached it */
index 9301217..d447a56 100644 (file)
@@ -63,8 +63,6 @@ struct parport_gsc_private {
        int writeIntrThreshold;
 
        /* buffer suitable for DMA, if DMA enabled */
-       char *dma_buf;
-       dma_addr_t dma_handle;
        struct pci_dev *dev;
 };
 
@@ -199,9 +197,4 @@ extern void parport_gsc_inc_use_count(void);
 
 extern void parport_gsc_dec_use_count(void);
 
-extern struct parport *parport_gsc_probe_port(unsigned long base,
-                                               unsigned long base_hi,
-                                               int irq, int dma,
-                                               struct parisc_device *padev);
-
 #endif /* __DRIVERS_PARPORT_PARPORT_GSC_H */
index 5bc81cc..46b252b 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/pci.h>
 #include <linux/errno.h>
 #include <linux/ioport.h>
+#include <linux/of.h>
 #include <linux/proc_fs.h>
 #include <linux/slab.h>
 
@@ -332,6 +333,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
  */
 void pci_bus_add_device(struct pci_dev *dev)
 {
+       struct device_node *dn = dev->dev.of_node;
        int retval;
 
        /*
@@ -344,7 +346,7 @@ void pci_bus_add_device(struct pci_dev *dev)
        pci_proc_attach_device(dev);
        pci_bridge_d3_update(dev);
 
-       dev->match_driver = true;
+       dev->match_driver = !dn || of_device_is_available(dn);
        retval = device_attach(&dev->dev);
        if (retval < 0 && retval != -EPROBE_DEFER)
                pci_warn(dev, "device attach failed (%d)\n", retval);
index 8d49bad..0859be8 100644 (file)
@@ -179,7 +179,6 @@ config PCI_MVEBU
        depends on MVEBU_MBUS
        depends on ARM
        depends on OF
-       depends on BROKEN
        select PCI_BRIDGE_EMUL
        help
         Add support for Marvell EBU PCIe controller. This PCIe controller
index cf61733..9952057 100644 (file)
@@ -485,20 +485,15 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
        if (ret)
                goto err_remove_edma;
 
-       if (dw_pcie_link_up(pci)) {
-               dw_pcie_print_link_status(pci);
-       } else {
+       if (!dw_pcie_link_up(pci)) {
                ret = dw_pcie_start_link(pci);
                if (ret)
                        goto err_remove_edma;
-
-               if (pci->ops && pci->ops->start_link) {
-                       ret = dw_pcie_wait_for_link(pci);
-                       if (ret)
-                               goto err_stop_link;
-               }
        }
 
+       /* Ignore errors, the link may come up later */
+       dw_pcie_wait_for_link(pci);
+
        bridge->sysdata = pp;
 
        ret = pci_host_probe(bridge);
index c87848c..1f2ee71 100644 (file)
@@ -644,20 +644,9 @@ void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index)
        dw_pcie_writel_atu(pci, dir, index, PCIE_ATU_REGION_CTRL2, 0);
 }
 
-void dw_pcie_print_link_status(struct dw_pcie *pci)
-{
-       u32 offset, val;
-
-       offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
-       val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
-
-       dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
-                FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
-                FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
-}
-
 int dw_pcie_wait_for_link(struct dw_pcie *pci)
 {
+       u32 offset, val;
        int retries;
 
        /* Check if the link is up or not */
@@ -673,7 +662,12 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci)
                return -ETIMEDOUT;
        }
 
-       dw_pcie_print_link_status(pci);
+       offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+       val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
+
+       dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
+                FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
+                FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
 
        return 0;
 }
index 6156606..79713ce 100644 (file)
@@ -429,7 +429,6 @@ void dw_pcie_setup(struct dw_pcie *pci);
 void dw_pcie_iatu_detect(struct dw_pcie *pci);
 int dw_pcie_edma_detect(struct dw_pcie *pci);
 void dw_pcie_edma_remove(struct dw_pcie *pci);
-void dw_pcie_print_link_status(struct dw_pcie *pci);
 
 static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val)
 {
index 328d1e4..6011297 100644 (file)
@@ -498,6 +498,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
                                acpiphp_native_scan_bridge(dev);
                }
        } else {
+               LIST_HEAD(add_list);
                int max, pass;
 
                acpiphp_rescan_slot(slot);
@@ -511,10 +512,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
                                if (pass && dev->subordinate) {
                                        check_hotplug_bridge(slot, dev);
                                        pcibios_resource_survey_bus(dev->subordinate);
+                                       if (pci_is_root_bus(bus))
+                                               __pci_bus_size_bridges(dev->subordinate, &add_list);
                                }
                        }
                }
-               pci_assign_unassigned_bridge_resources(bus->self);
+               if (pci_is_root_bus(bus))
+                       __pci_bus_assign_resources(bus, &add_list, NULL);
+               else
+                       pci_assign_unassigned_bridge_resources(bus->self);
        }
 
        acpiphp_sanitize_bus(bus);
index e51219f..3c158b1 100644 (file)
@@ -34,11 +34,6 @@ int pci_set_of_node(struct pci_dev *dev)
        if (!node)
                return 0;
 
-       if (!of_device_is_available(node)) {
-               of_node_put(node);
-               return -ENODEV;
-       }
-
        device_set_node(&dev->dev, of_fwnode_handle(node));
        return 0;
 }
index f4572a5..273d67e 100644 (file)
@@ -92,7 +92,7 @@ config ARM_PMU_ACPI
 
 config ARM_SMMU_V3_PMU
         tristate "ARM SMMUv3 Performance Monitors Extension"
-        depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT)
+        depends on ARM64 || (COMPILE_TEST && 64BIT)
         depends on GENERIC_MSI_IRQ
           help
           Provides support for the ARM SMMUv3 Performance Monitor Counter
index 5c5be9f..19d459a 100644 (file)
@@ -236,10 +236,37 @@ static const struct attribute_group ali_drw_pmu_cpumask_attr_group = {
        .attrs = ali_drw_pmu_cpumask_attrs,
 };
 
+static ssize_t ali_drw_pmu_identifier_show(struct device *dev,
+                                       struct device_attribute *attr,
+                                       char *page)
+{
+       return sysfs_emit(page, "%s\n", "ali_drw_pmu");
+}
+
+static umode_t ali_drw_pmu_identifier_attr_visible(struct kobject *kobj,
+                                               struct attribute *attr, int n)
+{
+       return attr->mode;
+}
+
+static struct device_attribute ali_drw_pmu_identifier_attr =
+       __ATTR(identifier, 0444, ali_drw_pmu_identifier_show, NULL);
+
+static struct attribute *ali_drw_pmu_identifier_attrs[] = {
+       &ali_drw_pmu_identifier_attr.attr,
+       NULL
+};
+
+static const struct attribute_group ali_drw_pmu_identifier_attr_group = {
+       .attrs = ali_drw_pmu_identifier_attrs,
+       .is_visible = ali_drw_pmu_identifier_attr_visible
+};
+
 static const struct attribute_group *ali_drw_pmu_attr_groups[] = {
        &ali_drw_pmu_events_attr_group,
        &ali_drw_pmu_cpumask_attr_group,
        &ali_drw_pmu_format_group,
+       &ali_drw_pmu_identifier_attr_group,
        NULL,
 };
 
index 0b24dee..bbc7285 100644 (file)
@@ -9,8 +9,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
index 998259f..61de861 100644 (file)
@@ -7,10 +7,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
index b8c1587..913dc04 100644 (file)
@@ -72,6 +72,8 @@
 /* For most nodes, this is all there is */
 #define CMN_PMU_EVENT_SEL              0x000
 #define CMN__PMU_CBUSY_SNTHROTTLE_SEL  GENMASK_ULL(44, 42)
+#define CMN__PMU_SN_HOME_SEL           GENMASK_ULL(40, 39)
+#define CMN__PMU_HBT_LBT_SEL           GENMASK_ULL(38, 37)
 #define CMN__PMU_CLASS_OCCUP_ID                GENMASK_ULL(36, 35)
 /* Technically this is 4 bits wide on DNs, but we only use 2 there anyway */
 #define CMN__PMU_OCCUP1_ID             GENMASK_ULL(34, 32)
@@ -226,6 +228,7 @@ enum cmn_revision {
        REV_CMN700_R0P0 = 0,
        REV_CMN700_R1P0,
        REV_CMN700_R2P0,
+       REV_CMN700_R3P0,
        REV_CI700_R0P0 = 0,
        REV_CI700_R1P0,
        REV_CI700_R2P0,
@@ -254,6 +257,9 @@ enum cmn_node_type {
        CMN_TYPE_CCHA,
        CMN_TYPE_CCLA,
        CMN_TYPE_CCLA_RNI,
+       CMN_TYPE_HNS = 0x200,
+       CMN_TYPE_HNS_MPAM_S,
+       CMN_TYPE_HNS_MPAM_NS,
        /* Not a real node type */
        CMN_TYPE_WP = 0x7770
 };
@@ -263,6 +269,8 @@ enum cmn_filter_select {
        SEL_OCCUP1ID,
        SEL_CLASS_OCCUP_ID,
        SEL_CBUSY_SNTHROTTLE_SEL,
+       SEL_HBT_LBT_SEL,
+       SEL_SN_HOME_SEL,
        SEL_MAX
 };
 
@@ -742,8 +750,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
        _CMN_EVENT_ATTR(_model, dn_##_name, CMN_TYPE_DVM, _event, _occup, _fsel)
 #define CMN_EVENT_DTC(_name)                                   \
        CMN_EVENT_ATTR(CMN_ANY, dtc_##_name, CMN_TYPE_DTC, 0)
-#define _CMN_EVENT_HNF(_model, _name, _event, _occup, _fsel)           \
-       _CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event, _occup, _fsel)
+#define CMN_EVENT_HNF(_model, _name, _event)                   \
+       CMN_EVENT_ATTR(_model, hnf_##_name, CMN_TYPE_HNF, _event)
 #define CMN_EVENT_HNI(_name, _event)                           \
        CMN_EVENT_ATTR(CMN_ANY, hni_##_name, CMN_TYPE_HNI, _event)
 #define CMN_EVENT_HNP(_name, _event)                           \
@@ -768,6 +776,8 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
        CMN_EVENT_ATTR(CMN_ANY, ccla_##_name, CMN_TYPE_CCLA, _event)
 #define CMN_EVENT_CCLA_RNI(_name, _event)                              \
        CMN_EVENT_ATTR(CMN_ANY, ccla_rni_##_name, CMN_TYPE_CCLA_RNI, _event)
+#define CMN_EVENT_HNS(_name, _event)                           \
+       CMN_EVENT_ATTR(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
 
 #define CMN_EVENT_DVM(_model, _name, _event)                   \
        _CMN_EVENT_DVM(_model, _name, _event, 0, SEL_NONE)
@@ -775,32 +785,68 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
        _CMN_EVENT_DVM(_model, _name##_all, _event, 0, SEL_OCCUP1ID),   \
        _CMN_EVENT_DVM(_model, _name##_dvmop, _event, 1, SEL_OCCUP1ID), \
        _CMN_EVENT_DVM(_model, _name##_dvmsync, _event, 2, SEL_OCCUP1ID)
-#define CMN_EVENT_HNF(_model, _name, _event)                   \
-       _CMN_EVENT_HNF(_model, _name, _event, 0, SEL_NONE)
+
+#define CMN_EVENT_HN_OCC(_model, _name, _type, _event)         \
+       _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_OCCUP1ID), \
+       _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 1, SEL_OCCUP1ID), \
+       _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 2, SEL_OCCUP1ID), \
+       _CMN_EVENT_ATTR(_model, _name##_atomic, _type, _event, 3, SEL_OCCUP1ID), \
+       _CMN_EVENT_ATTR(_model, _name##_stash, _type, _event, 4, SEL_OCCUP1ID)
+#define CMN_EVENT_HN_CLS(_model, _name, _type, _event)                 \
+       _CMN_EVENT_ATTR(_model, _name##_class0, _type, _event, 0, SEL_CLASS_OCCUP_ID), \
+       _CMN_EVENT_ATTR(_model, _name##_class1, _type, _event, 1, SEL_CLASS_OCCUP_ID), \
+       _CMN_EVENT_ATTR(_model, _name##_class2, _type, _event, 2, SEL_CLASS_OCCUP_ID), \
+       _CMN_EVENT_ATTR(_model, _name##_class3, _type, _event, 3, SEL_CLASS_OCCUP_ID)
+#define CMN_EVENT_HN_SNT(_model, _name, _type, _event)                 \
+       _CMN_EVENT_ATTR(_model, _name##_all, _type, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_ATTR(_model, _name##_group0_read, _type, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_ATTR(_model, _name##_group0_write, _type, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_ATTR(_model, _name##_group1_read, _type, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_ATTR(_model, _name##_group1_write, _type, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_ATTR(_model, _name##_read, _type, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
+       _CMN_EVENT_ATTR(_model, _name##_write, _type, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
+
+#define CMN_EVENT_HNF_OCC(_model, _name, _event)                       \
+       CMN_EVENT_HN_OCC(_model, hnf_##_name, CMN_TYPE_HNF, _event)
 #define CMN_EVENT_HNF_CLS(_model, _name, _event)                       \
-       _CMN_EVENT_HNF(_model, _name##_class0, _event, 0, SEL_CLASS_OCCUP_ID), \
-       _CMN_EVENT_HNF(_model, _name##_class1, _event, 1, SEL_CLASS_OCCUP_ID), \
-       _CMN_EVENT_HNF(_model, _name##_class2, _event, 2, SEL_CLASS_OCCUP_ID), \
-       _CMN_EVENT_HNF(_model, _name##_class3, _event, 3, SEL_CLASS_OCCUP_ID)
+       CMN_EVENT_HN_CLS(_model, hnf_##_name, CMN_TYPE_HNS, _event)
 #define CMN_EVENT_HNF_SNT(_model, _name, _event)                       \
-       _CMN_EVENT_HNF(_model, _name##_all, _event, 0, SEL_CBUSY_SNTHROTTLE_SEL), \
-       _CMN_EVENT_HNF(_model, _name##_group0_read, _event, 1, SEL_CBUSY_SNTHROTTLE_SEL), \
-       _CMN_EVENT_HNF(_model, _name##_group0_write, _event, 2, SEL_CBUSY_SNTHROTTLE_SEL), \
-       _CMN_EVENT_HNF(_model, _name##_group1_read, _event, 3, SEL_CBUSY_SNTHROTTLE_SEL), \
-       _CMN_EVENT_HNF(_model, _name##_group1_write, _event, 4, SEL_CBUSY_SNTHROTTLE_SEL), \
-       _CMN_EVENT_HNF(_model, _name##_read, _event, 5, SEL_CBUSY_SNTHROTTLE_SEL), \
-       _CMN_EVENT_HNF(_model, _name##_write, _event, 6, SEL_CBUSY_SNTHROTTLE_SEL)
-
-#define _CMN_EVENT_XP(_name, _event)                           \
+       CMN_EVENT_HN_SNT(_model, hnf_##_name, CMN_TYPE_HNF, _event)
+
+#define CMN_EVENT_HNS_OCC(_name, _event)                               \
+       CMN_EVENT_HN_OCC(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event),   \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_rxsnp, CMN_TYPE_HNS, _event, 5, SEL_OCCUP1ID), \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 6, SEL_OCCUP1ID), \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 7, SEL_OCCUP1ID)
+#define CMN_EVENT_HNS_CLS( _name, _event)                              \
+       CMN_EVENT_HN_CLS(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_SNT(_name, _event)                               \
+       CMN_EVENT_HN_SNT(CMN_ANY, hns_##_name, CMN_TYPE_HNS, _event)
+#define CMN_EVENT_HNS_HBT(_name, _event)                               \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_HBT_LBT_SEL), \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_hbt, CMN_TYPE_HNS, _event, 1, SEL_HBT_LBT_SEL), \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_lbt, CMN_TYPE_HNS, _event, 2, SEL_HBT_LBT_SEL)
+#define CMN_EVENT_HNS_SNH(_name, _event)                               \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_all, CMN_TYPE_HNS, _event, 0, SEL_SN_HOME_SEL), \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_sn, CMN_TYPE_HNS, _event, 1, SEL_SN_HOME_SEL), \
+       _CMN_EVENT_ATTR(CMN_ANY, hns_##_name##_home, CMN_TYPE_HNS, _event, 2, SEL_SN_HOME_SEL)
+
+#define _CMN_EVENT_XP_MESH(_name, _event)                      \
        __CMN_EVENT_XP(e_##_name, (_event) | (0 << 2)),         \
        __CMN_EVENT_XP(w_##_name, (_event) | (1 << 2)),         \
        __CMN_EVENT_XP(n_##_name, (_event) | (2 << 2)),         \
-       __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2)),         \
+       __CMN_EVENT_XP(s_##_name, (_event) | (3 << 2))
+
+#define _CMN_EVENT_XP_PORT(_name, _event)                      \
        __CMN_EVENT_XP(p0_##_name, (_event) | (4 << 2)),        \
        __CMN_EVENT_XP(p1_##_name, (_event) | (5 << 2)),        \
        __CMN_EVENT_XP(p2_##_name, (_event) | (6 << 2)),        \
        __CMN_EVENT_XP(p3_##_name, (_event) | (7 << 2))
 
+#define _CMN_EVENT_XP(_name, _event)                           \
+       _CMN_EVENT_XP_MESH(_name, _event),                      \
+       _CMN_EVENT_XP_PORT(_name, _event)
+
 /* Good thing there are only 3 fundamental XP events... */
 #define CMN_EVENT_XP(_name, _event)                            \
        _CMN_EVENT_XP(req_##_name, (_event) | (0 << 5)),        \
@@ -813,6 +859,10 @@ static umode_t arm_cmn_event_attr_is_visible(struct kobject *kobj,
        _CMN_EVENT_XP(snp2_##_name, (_event) | (7 << 5)),       \
        _CMN_EVENT_XP(req2_##_name, (_event) | (8 << 5))
 
+#define CMN_EVENT_XP_DAT(_name, _event)                                \
+       _CMN_EVENT_XP_PORT(dat_##_name, (_event) | (3 << 5)),   \
+       _CMN_EVENT_XP_PORT(dat2_##_name, (_event) | (6 << 5))
+
 
 static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_DTC(cycles),
@@ -862,11 +912,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_HNF(CMN_ANY, mc_retries,              0x0c),
        CMN_EVENT_HNF(CMN_ANY, mc_reqs,                 0x0d),
        CMN_EVENT_HNF(CMN_ANY, qos_hh_retry,            0x0e),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_all, 0x0f, 0, SEL_OCCUP1ID),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_read, 0x0f, 1, SEL_OCCUP1ID),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_write, 0x0f, 2, SEL_OCCUP1ID),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_atomic, 0x0f, 3, SEL_OCCUP1ID),
-       _CMN_EVENT_HNF(CMN_ANY, qos_pocq_occupancy_stash, 0x0f, 4, SEL_OCCUP1ID),
+       CMN_EVENT_HNF_OCC(CMN_ANY, qos_pocq_occupancy,  0x0f),
        CMN_EVENT_HNF(CMN_ANY, pocq_addrhaz,            0x10),
        CMN_EVENT_HNF(CMN_ANY, pocq_atomic_addrhaz,     0x11),
        CMN_EVENT_HNF(CMN_ANY, ld_st_swp_adq_full,      0x12),
@@ -943,7 +989,7 @@ static struct attribute *arm_cmn_event_attrs[] = {
 
        CMN_EVENT_XP(txflit_valid,                      0x01),
        CMN_EVENT_XP(txflit_stall,                      0x02),
-       CMN_EVENT_XP(partial_dat_flit,                  0x03),
+       CMN_EVENT_XP_DAT(partial_dat_flit,              0x03),
        /* We treat watchpoints as a special made-up class of XP events */
        CMN_EVENT_ATTR(CMN_ANY, watchpoint_up, CMN_TYPE_WP, CMN_WP_UP),
        CMN_EVENT_ATTR(CMN_ANY, watchpoint_down, CMN_TYPE_WP, CMN_WP_DOWN),
@@ -1132,6 +1178,66 @@ static struct attribute *arm_cmn_event_attrs[] = {
        CMN_EVENT_CCLA(pfwd_sndr_stalls_static_crd,     0x2a),
        CMN_EVENT_CCLA(pfwd_sndr_stalls_dynmaic_crd,    0x2b),
 
+       CMN_EVENT_HNS_HBT(cache_miss,                   0x01),
+       CMN_EVENT_HNS_HBT(slc_sf_cache_access,          0x02),
+       CMN_EVENT_HNS_HBT(cache_fill,                   0x03),
+       CMN_EVENT_HNS_HBT(pocq_retry,                   0x04),
+       CMN_EVENT_HNS_HBT(pocq_reqs_recvd,              0x05),
+       CMN_EVENT_HNS_HBT(sf_hit,                       0x06),
+       CMN_EVENT_HNS_HBT(sf_evictions,                 0x07),
+       CMN_EVENT_HNS(dir_snoops_sent,                  0x08),
+       CMN_EVENT_HNS(brd_snoops_sent,                  0x09),
+       CMN_EVENT_HNS_HBT(slc_eviction,                 0x0a),
+       CMN_EVENT_HNS_HBT(slc_fill_invalid_way,         0x0b),
+       CMN_EVENT_HNS(mc_retries_local,                 0x0c),
+       CMN_EVENT_HNS_SNH(mc_reqs_local,                0x0d),
+       CMN_EVENT_HNS(qos_hh_retry,                     0x0e),
+       CMN_EVENT_HNS_OCC(qos_pocq_occupancy,           0x0f),
+       CMN_EVENT_HNS(pocq_addrhaz,                     0x10),
+       CMN_EVENT_HNS(pocq_atomic_addrhaz,              0x11),
+       CMN_EVENT_HNS(ld_st_swp_adq_full,               0x12),
+       CMN_EVENT_HNS(cmp_adq_full,                     0x13),
+       CMN_EVENT_HNS(txdat_stall,                      0x14),
+       CMN_EVENT_HNS(txrsp_stall,                      0x15),
+       CMN_EVENT_HNS(seq_full,                         0x16),
+       CMN_EVENT_HNS(seq_hit,                          0x17),
+       CMN_EVENT_HNS(snp_sent,                         0x18),
+       CMN_EVENT_HNS(sfbi_dir_snp_sent,                0x19),
+       CMN_EVENT_HNS(sfbi_brd_snp_sent,                0x1a),
+       CMN_EVENT_HNS(intv_dirty,                       0x1c),
+       CMN_EVENT_HNS(stash_snp_sent,                   0x1d),
+       CMN_EVENT_HNS(stash_data_pull,                  0x1e),
+       CMN_EVENT_HNS(snp_fwded,                        0x1f),
+       CMN_EVENT_HNS(atomic_fwd,                       0x20),
+       CMN_EVENT_HNS(mpam_hardlim,                     0x21),
+       CMN_EVENT_HNS(mpam_softlim,                     0x22),
+       CMN_EVENT_HNS(snp_sent_cluster,                 0x23),
+       CMN_EVENT_HNS(sf_imprecise_evict,               0x24),
+       CMN_EVENT_HNS(sf_evict_shared_line,             0x25),
+       CMN_EVENT_HNS_CLS(pocq_class_occup,             0x26),
+       CMN_EVENT_HNS_CLS(pocq_class_retry,             0x27),
+       CMN_EVENT_HNS_CLS(class_mc_reqs_local,          0x28),
+       CMN_EVENT_HNS_CLS(class_cgnt_cmin,              0x29),
+       CMN_EVENT_HNS_SNT(sn_throttle,                  0x2a),
+       CMN_EVENT_HNS_SNT(sn_throttle_min,              0x2b),
+       CMN_EVENT_HNS(sf_precise_to_imprecise,          0x2c),
+       CMN_EVENT_HNS(snp_intv_cln,                     0x2d),
+       CMN_EVENT_HNS(nc_excl,                          0x2e),
+       CMN_EVENT_HNS(excl_mon_ovfl,                    0x2f),
+       CMN_EVENT_HNS(snp_req_recvd,                    0x30),
+       CMN_EVENT_HNS(snp_req_byp_pocq,                 0x31),
+       CMN_EVENT_HNS(dir_ccgha_snp_sent,               0x32),
+       CMN_EVENT_HNS(brd_ccgha_snp_sent,               0x33),
+       CMN_EVENT_HNS(ccgha_snp_stall,                  0x34),
+       CMN_EVENT_HNS(lbt_req_hardlim,                  0x35),
+       CMN_EVENT_HNS(hbt_req_hardlim,                  0x36),
+       CMN_EVENT_HNS(sf_reupdate,                      0x37),
+       CMN_EVENT_HNS(excl_sf_imprecise,                0x38),
+       CMN_EVENT_HNS(snp_pocq_addrhaz,                 0x39),
+       CMN_EVENT_HNS(mc_retries_remote,                0x3a),
+       CMN_EVENT_HNS_SNH(mc_reqs_remote,               0x3b),
+       CMN_EVENT_HNS_CLS(class_mc_reqs_remote,         0x3c),
+
        NULL
 };
 
@@ -1373,6 +1479,10 @@ static int arm_cmn_set_event_sel_hi(struct arm_cmn_node *dn,
                dn->occupid[fsel].val = occupid;
                reg = FIELD_PREP(CMN__PMU_CBUSY_SNTHROTTLE_SEL,
                                 dn->occupid[SEL_CBUSY_SNTHROTTLE_SEL].val) |
+                     FIELD_PREP(CMN__PMU_SN_HOME_SEL,
+                                dn->occupid[SEL_SN_HOME_SEL].val) |
+                     FIELD_PREP(CMN__PMU_HBT_LBT_SEL,
+                                dn->occupid[SEL_HBT_LBT_SEL].val) |
                      FIELD_PREP(CMN__PMU_CLASS_OCCUP_ID,
                                 dn->occupid[SEL_CLASS_OCCUP_ID].val) |
                      FIELD_PREP(CMN__PMU_OCCUP1_ID,
@@ -2200,6 +2310,7 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                        case CMN_TYPE_CCRA:
                        case CMN_TYPE_CCHA:
                        case CMN_TYPE_CCLA:
+                       case CMN_TYPE_HNS:
                                dn++;
                                break;
                        /* Nothing to see here */
@@ -2207,6 +2318,8 @@ static int arm_cmn_discover(struct arm_cmn *cmn, unsigned int rgn_offset)
                        case CMN_TYPE_MPAM_NS:
                        case CMN_TYPE_RNSAM:
                        case CMN_TYPE_CXLA:
+                       case CMN_TYPE_HNS_MPAM_S:
+                       case CMN_TYPE_HNS_MPAM_NS:
                                break;
                        /*
                         * Split "optimised" combination nodes into separate
index 9d0f01c..30cea68 100644 (file)
 #define DMC620_PMU_COUNTERn_OFFSET(n) \
        (DMC620_PMU_COUNTERS_BASE + 0x28 * (n))
 
-static LIST_HEAD(dmc620_pmu_irqs);
+/*
+ * dmc620_pmu_irqs_lock: protects dmc620_pmu_irqs list
+ * dmc620_pmu_node_lock: protects pmus_node lists in all dmc620_pmu instances
+ */
 static DEFINE_MUTEX(dmc620_pmu_irqs_lock);
+static DEFINE_MUTEX(dmc620_pmu_node_lock);
+static LIST_HEAD(dmc620_pmu_irqs);
 
 struct dmc620_pmu_irq {
        struct hlist_node node;
@@ -475,9 +480,9 @@ static int dmc620_pmu_get_irq(struct dmc620_pmu *dmc620_pmu, int irq_num)
                return PTR_ERR(irq);
 
        dmc620_pmu->irq = irq;
-       mutex_lock(&dmc620_pmu_irqs_lock);
+       mutex_lock(&dmc620_pmu_node_lock);
        list_add_rcu(&dmc620_pmu->pmus_node, &irq->pmus_node);
-       mutex_unlock(&dmc620_pmu_irqs_lock);
+       mutex_unlock(&dmc620_pmu_node_lock);
 
        return 0;
 }
@@ -486,9 +491,11 @@ static void dmc620_pmu_put_irq(struct dmc620_pmu *dmc620_pmu)
 {
        struct dmc620_pmu_irq *irq = dmc620_pmu->irq;
 
-       mutex_lock(&dmc620_pmu_irqs_lock);
+       mutex_lock(&dmc620_pmu_node_lock);
        list_del_rcu(&dmc620_pmu->pmus_node);
+       mutex_unlock(&dmc620_pmu_node_lock);
 
+       mutex_lock(&dmc620_pmu_irqs_lock);
        if (!refcount_dec_and_test(&irq->refcount)) {
                mutex_unlock(&dmc620_pmu_irqs_lock);
                return;
@@ -638,10 +645,10 @@ static int dmc620_pmu_cpu_teardown(unsigned int cpu,
                return 0;
 
        /* We're only reading, but this isn't the place to be involving RCU */
-       mutex_lock(&dmc620_pmu_irqs_lock);
+       mutex_lock(&dmc620_pmu_node_lock);
        list_for_each_entry(dmc620_pmu, &irq->pmus_node, pmus_node)
                perf_pmu_migrate_context(&dmc620_pmu->pmu, irq->cpu, target);
-       mutex_unlock(&dmc620_pmu_irqs_lock);
+       mutex_unlock(&dmc620_pmu_node_lock);
 
        WARN_ON(irq_set_affinity(irq->irq_num, cpumask_of(target)));
        irq->cpu = target;
index fe2abb4..8223c49 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
index f6ccb2c..d712a19 100644 (file)
@@ -877,11 +877,13 @@ struct arm_pmu *armpmu_alloc(void)
                .attr_groups    = pmu->attr_groups,
                /*
                 * This is a CPU PMU potentially in a heterogeneous
-                * configuration (e.g. big.LITTLE). This is not an uncore PMU,
-                * and we have taken ctx sharing into account (e.g. with our
-                * pmu::filter callback and pmu::event_init group validation).
+                * configuration (e.g. big.LITTLE) so
+                * PERF_PMU_CAP_EXTENDED_HW_TYPE is required to open
+                * PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE events on a
+                * specific PMU.
                 */
-               .capabilities   = PERF_PMU_CAP_HETEROGENEOUS_CPUS | PERF_PMU_CAP_EXTENDED_REGS,
+               .capabilities   = PERF_PMU_CAP_EXTENDED_REGS |
+                                 PERF_PMU_CAP_EXTENDED_HW_TYPE,
        };
 
        pmu->attr_groups[ARMPMU_ATTR_GROUP_COMMON] =
index 90815ad..05dda19 100644 (file)
@@ -69,6 +69,62 @@ static void arm_pmu_acpi_unregister_irq(int cpu)
                acpi_unregister_gsi(gsi);
 }
 
+static int __maybe_unused
+arm_acpi_register_pmu_device(struct platform_device *pdev, u8 len,
+                            u16 (*parse_gsi)(struct acpi_madt_generic_interrupt *))
+{
+       int cpu, this_hetid, hetid, irq, ret;
+       u16 this_gsi = 0, gsi = 0;
+
+       /*
+        * Ensure that platform device must have IORESOURCE_IRQ
+        * resource to hold gsi interrupt.
+        */
+       if (pdev->num_resources != 1)
+               return -ENXIO;
+
+       if (pdev->resource[0].flags != IORESOURCE_IRQ)
+               return -ENXIO;
+
+       /*
+        * Sanity check all the GICC tables for the same interrupt
+        * number. For now, only support homogeneous ACPI machines.
+        */
+       for_each_possible_cpu(cpu) {
+               struct acpi_madt_generic_interrupt *gicc;
+
+               gicc = acpi_cpu_get_madt_gicc(cpu);
+               if (gicc->header.length < len)
+                       return gsi ? -ENXIO : 0;
+
+               this_gsi = parse_gsi(gicc);
+               this_hetid = find_acpi_cpu_topology_hetero_id(cpu);
+               if (!gsi) {
+                       hetid = this_hetid;
+                       gsi = this_gsi;
+               } else if (hetid != this_hetid || gsi != this_gsi) {
+                       pr_warn("ACPI: %s: must be homogeneous\n", pdev->name);
+                       return -ENXIO;
+               }
+       }
+
+       if (!this_gsi)
+               return 0;
+
+       irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH);
+       if (irq < 0) {
+               pr_warn("ACPI: %s Unable to register interrupt: %d\n", pdev->name, gsi);
+               return -ENXIO;
+       }
+
+       pdev->resource[0].start = irq;
+       ret = platform_device_register(pdev);
+       if (ret)
+               acpi_unregister_gsi(gsi);
+
+       return ret;
+}
+
 #if IS_ENABLED(CONFIG_ARM_SPE_PMU)
 static struct resource spe_resources[] = {
        {
@@ -84,6 +140,11 @@ static struct platform_device spe_dev = {
        .num_resources = ARRAY_SIZE(spe_resources)
 };
 
+static u16 arm_spe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+       return gicc->spe_interrupt;
+}
+
 /*
  * For lack of a better place, hook the normal PMU MADT walk
  * and create a SPE device if we detect a recent MADT with
@@ -91,53 +152,50 @@ static struct platform_device spe_dev = {
  */
 static void arm_spe_acpi_register_device(void)
 {
-       int cpu, hetid, irq, ret;
-       bool first = true;
-       u16 gsi = 0;
-
-       /*
-        * Sanity check all the GICC tables for the same interrupt number.
-        * For now, we only support homogeneous ACPI/SPE machines.
-        */
-       for_each_possible_cpu(cpu) {
-               struct acpi_madt_generic_interrupt *gicc;
+       int ret = arm_acpi_register_pmu_device(&spe_dev, ACPI_MADT_GICC_SPE,
+                                              arm_spe_parse_gsi);
+       if (ret)
+               pr_warn("ACPI: SPE: Unable to register device\n");
+}
+#else
+static inline void arm_spe_acpi_register_device(void)
+{
+}
+#endif /* CONFIG_ARM_SPE_PMU */
 
-               gicc = acpi_cpu_get_madt_gicc(cpu);
-               if (gicc->header.length < ACPI_MADT_GICC_SPE)
-                       return;
-
-               if (first) {
-                       gsi = gicc->spe_interrupt;
-                       if (!gsi)
-                               return;
-                       hetid = find_acpi_cpu_topology_hetero_id(cpu);
-                       first = false;
-               } else if ((gsi != gicc->spe_interrupt) ||
-                          (hetid != find_acpi_cpu_topology_hetero_id(cpu))) {
-                       pr_warn("ACPI: SPE must be homogeneous\n");
-                       return;
-               }
+#if IS_ENABLED(CONFIG_CORESIGHT_TRBE)
+static struct resource trbe_resources[] = {
+       {
+               /* irq */
+               .flags          = IORESOURCE_IRQ,
        }
+};
 
-       irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE,
-                               ACPI_ACTIVE_HIGH);
-       if (irq < 0) {
-               pr_warn("ACPI: SPE Unable to register interrupt: %d\n", gsi);
-               return;
-       }
+static struct platform_device trbe_dev = {
+       .name = ARMV8_TRBE_PDEV_NAME,
+       .id = -1,
+       .resource = trbe_resources,
+       .num_resources = ARRAY_SIZE(trbe_resources)
+};
 
-       spe_resources[0].start = irq;
-       ret = platform_device_register(&spe_dev);
-       if (ret < 0) {
-               pr_warn("ACPI: SPE: Unable to register device\n");
-               acpi_unregister_gsi(gsi);
-       }
+static u16 arm_trbe_parse_gsi(struct acpi_madt_generic_interrupt *gicc)
+{
+       return gicc->trbe_interrupt;
+}
+
+static void arm_trbe_acpi_register_device(void)
+{
+       int ret = arm_acpi_register_pmu_device(&trbe_dev, ACPI_MADT_GICC_TRBE,
+                                              arm_trbe_parse_gsi);
+       if (ret)
+               pr_warn("ACPI: TRBE: Unable to register device\n");
 }
 #else
-static inline void arm_spe_acpi_register_device(void)
+static inline void arm_trbe_acpi_register_device(void)
 {
+
 }
-#endif /* CONFIG_ARM_SPE_PMU */
+#endif /* CONFIG_CORESIGHT_TRBE */
 
 static int arm_pmu_acpi_parse_irqs(void)
 {
@@ -374,6 +432,7 @@ static int arm_pmu_acpi_init(void)
                return 0;
 
        arm_spe_acpi_register_device();
+       arm_trbe_acpi_register_device();
 
        return 0;
 }
index 933b96e..3596db3 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/irqdesc.h>
 #include <linux/kconfig.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/percpu.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
index 08b3a1b..e5a2ac4 100644 (file)
@@ -721,38 +721,15 @@ static void armv8pmu_enable_event(struct perf_event *event)
         * Enable counter and interrupt, and set the counter to count
         * the event that we're interested in.
         */
-
-       /*
-        * Disable counter
-        */
        armv8pmu_disable_event_counter(event);
-
-       /*
-        * Set event.
-        */
        armv8pmu_write_event_type(event);
-
-       /*
-        * Enable interrupt for this counter
-        */
        armv8pmu_enable_event_irq(event);
-
-       /*
-        * Enable counter
-        */
        armv8pmu_enable_event_counter(event);
 }
 
 static void armv8pmu_disable_event(struct perf_event *event)
 {
-       /*
-        * Disable counter
-        */
        armv8pmu_disable_event_counter(event);
-
-       /*
-        * Disable interrupt for this counter
-        */
        armv8pmu_disable_event_irq(event);
 }
 
@@ -1266,9 +1243,14 @@ PMUV3_INIT_SIMPLE(armv8_cortex_a76)
 PMUV3_INIT_SIMPLE(armv8_cortex_a77)
 PMUV3_INIT_SIMPLE(armv8_cortex_a78)
 PMUV3_INIT_SIMPLE(armv9_cortex_a510)
+PMUV3_INIT_SIMPLE(armv9_cortex_a520)
 PMUV3_INIT_SIMPLE(armv9_cortex_a710)
+PMUV3_INIT_SIMPLE(armv9_cortex_a715)
+PMUV3_INIT_SIMPLE(armv9_cortex_a720)
 PMUV3_INIT_SIMPLE(armv8_cortex_x1)
 PMUV3_INIT_SIMPLE(armv9_cortex_x2)
+PMUV3_INIT_SIMPLE(armv9_cortex_x3)
+PMUV3_INIT_SIMPLE(armv9_cortex_x4)
 PMUV3_INIT_SIMPLE(armv8_neoverse_e1)
 PMUV3_INIT_SIMPLE(armv8_neoverse_n1)
 PMUV3_INIT_SIMPLE(armv9_neoverse_n2)
@@ -1334,9 +1316,14 @@ static const struct of_device_id armv8_pmu_of_device_ids[] = {
        {.compatible = "arm,cortex-a77-pmu",    .data = armv8_cortex_a77_pmu_init},
        {.compatible = "arm,cortex-a78-pmu",    .data = armv8_cortex_a78_pmu_init},
        {.compatible = "arm,cortex-a510-pmu",   .data = armv9_cortex_a510_pmu_init},
+       {.compatible = "arm,cortex-a520-pmu",   .data = armv9_cortex_a520_pmu_init},
        {.compatible = "arm,cortex-a710-pmu",   .data = armv9_cortex_a710_pmu_init},
+       {.compatible = "arm,cortex-a715-pmu",   .data = armv9_cortex_a715_pmu_init},
+       {.compatible = "arm,cortex-a720-pmu",   .data = armv9_cortex_a720_pmu_init},
        {.compatible = "arm,cortex-x1-pmu",     .data = armv8_cortex_x1_pmu_init},
        {.compatible = "arm,cortex-x2-pmu",     .data = armv9_cortex_x2_pmu_init},
+       {.compatible = "arm,cortex-x3-pmu",     .data = armv9_cortex_x3_pmu_init},
+       {.compatible = "arm,cortex-x4-pmu",     .data = armv9_cortex_x4_pmu_init},
        {.compatible = "arm,neoverse-e1-pmu",   .data = armv8_neoverse_e1_pmu_init},
        {.compatible = "arm,neoverse-n1-pmu",   .data = armv8_neoverse_n1_pmu_init},
        {.compatible = "arm,neoverse-n2-pmu",   .data = armv9_neoverse_n2_pmu_init},
index 25a269d..6303b82 100644 (file)
 #define SMMU_PMCG_PA_SHIFT              12
 
 #define SMMU_PMCG_EVCNTR_RDONLY         BIT(0)
+#define SMMU_PMCG_HARDEN_DISABLE        BIT(1)
 
 static int cpuhp_state_num;
 
@@ -159,6 +160,20 @@ static inline void smmu_pmu_enable(struct pmu *pmu)
        writel(SMMU_PMCG_CR_ENABLE, smmu_pmu->reg_base + SMMU_PMCG_CR);
 }
 
+static int smmu_pmu_apply_event_filter(struct smmu_pmu *smmu_pmu,
+                                      struct perf_event *event, int idx);
+
+static inline void smmu_pmu_enable_quirk_hip08_09(struct pmu *pmu)
+{
+       struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+       unsigned int idx;
+
+       for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+               smmu_pmu_apply_event_filter(smmu_pmu, smmu_pmu->events[idx], idx);
+
+       smmu_pmu_enable(pmu);
+}
+
 static inline void smmu_pmu_disable(struct pmu *pmu)
 {
        struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
@@ -167,6 +182,22 @@ static inline void smmu_pmu_disable(struct pmu *pmu)
        writel(0, smmu_pmu->reg_base + SMMU_PMCG_IRQ_CTRL);
 }
 
+static inline void smmu_pmu_disable_quirk_hip08_09(struct pmu *pmu)
+{
+       struct smmu_pmu *smmu_pmu = to_smmu_pmu(pmu);
+       unsigned int idx;
+
+       /*
+        * The global disable of PMU sometimes fail to stop the counting.
+        * Harden this by writing an invalid event type to each used counter
+        * to forcibly stop counting.
+        */
+       for_each_set_bit(idx, smmu_pmu->used_counters, smmu_pmu->num_counters)
+               writel(0xffff, smmu_pmu->reg_base + SMMU_PMCG_EVTYPER(idx));
+
+       smmu_pmu_disable(pmu);
+}
+
 static inline void smmu_pmu_counter_set_value(struct smmu_pmu *smmu_pmu,
                                              u32 idx, u64 value)
 {
@@ -765,7 +796,10 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu)
        switch (model) {
        case IORT_SMMU_V3_PMCG_HISI_HIP08:
                /* HiSilicon Erratum 162001800 */
-               smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY;
+               smmu_pmu->options |= SMMU_PMCG_EVCNTR_RDONLY | SMMU_PMCG_HARDEN_DISABLE;
+               break;
+       case IORT_SMMU_V3_PMCG_HISI_HIP09:
+               smmu_pmu->options |= SMMU_PMCG_HARDEN_DISABLE;
                break;
        }
 
@@ -890,6 +924,16 @@ static int smmu_pmu_probe(struct platform_device *pdev)
        if (!dev->of_node)
                smmu_pmu_get_acpi_options(smmu_pmu);
 
+       /*
+        * For platforms suffer this quirk, the PMU disable sometimes fails to
+        * stop the counters. This will leads to inaccurate or error counting.
+        * Forcibly disable the counters with these quirk handler.
+        */
+       if (smmu_pmu->options & SMMU_PMCG_HARDEN_DISABLE) {
+               smmu_pmu->pmu.pmu_enable = smmu_pmu_enable_quirk_hip08_09;
+               smmu_pmu->pmu.pmu_disable = smmu_pmu_disable_quirk_hip08_09;
+       }
+
        /* Pick one CPU to be the preferred one to use */
        smmu_pmu->on_cpu = raw_smp_processor_id();
        WARN_ON(irq_set_affinity(smmu_pmu->irq, cpumask_of(smmu_pmu->on_cpu)));
@@ -984,6 +1028,7 @@ static void __exit arm_smmu_pmu_exit(void)
 
 module_exit(arm_smmu_pmu_exit);
 
+MODULE_ALIAS("platform:arm-smmu-v3-pmcg");
 MODULE_DESCRIPTION("PMU driver for ARM SMMUv3 Performance Monitors Extension");
 MODULE_AUTHOR("Neil Leeder <nleeder@codeaurora.org>");
 MODULE_AUTHOR("Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>");
index b9ba4c4..d2b0cbf 100644 (file)
@@ -25,8 +25,7 @@
 #include <linux/kernel.h>
 #include <linux/list.h>
 #include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/perf_event.h>
 #include <linux/perf/arm_pmu.h>
 #include <linux/platform_device.h>
index 5222ba1..92611c9 100644 (file)
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/perf_event.h>
+#include <linux/platform_device.h>
 #include <linux/slab.h>
 
 #define COUNTER_CNTL           0x0
@@ -28,6 +27,8 @@
 #define CNTL_CLEAR_MASK                0xFFFFFFFD
 #define CNTL_OVER_MASK         0xFFFFFFFE
 
+#define CNTL_CP_SHIFT          16
+#define CNTL_CP_MASK           (0xFF << CNTL_CP_SHIFT)
 #define CNTL_CSV_SHIFT         24
 #define CNTL_CSV_MASK          (0xFFU << CNTL_CSV_SHIFT)
 
@@ -35,6 +36,8 @@
 #define EVENT_CYCLES_COUNTER   0
 #define NUM_COUNTERS           4
 
+/* For removing bias if cycle counter CNTL.CP is set to 0xf0 */
+#define CYCLES_COUNTER_MASK    0x0FFFFFFF
 #define AXI_MASKING_REVERT     0xffff0000      /* AXI_MASKING(MSB 16bits) + AXI_ID(LSB 16bits) */
 
 #define to_ddr_pmu(p)          container_of(p, struct ddr_pmu, pmu)
@@ -101,6 +104,7 @@ struct ddr_pmu {
        const struct fsl_ddr_devtype_data *devtype_data;
        int irq;
        int id;
+       int active_counter;
 };
 
 static ssize_t ddr_perf_identifier_show(struct device *dev,
@@ -427,6 +431,17 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
                writel(0, pmu->base + reg);
                val = CNTL_EN | CNTL_CLEAR;
                val |= FIELD_PREP(CNTL_CSV_MASK, config);
+
+               /*
+                * On i.MX8MP we need to bias the cycle counter to overflow more often.
+                * We do this by initializing bits [23:16] of the counter value via the
+                * COUNTER_CTRL Counter Parameter (CP) field.
+                */
+               if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+                       if (counter == EVENT_CYCLES_COUNTER)
+                               val |= FIELD_PREP(CNTL_CP_MASK, 0xf0);
+               }
+
                writel(val, pmu->base + reg);
        } else {
                /* Disable counter */
@@ -466,6 +481,12 @@ static void ddr_perf_event_update(struct perf_event *event)
        int ret;
 
        new_raw_count = ddr_perf_read_counter(pmu, counter);
+       /* Remove the bias applied in ddr_perf_counter_enable(). */
+       if (pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED) {
+               if (counter == EVENT_CYCLES_COUNTER)
+                       new_raw_count &= CYCLES_COUNTER_MASK;
+       }
+
        local64_add(new_raw_count, &event->count);
 
        /*
@@ -495,6 +516,10 @@ static void ddr_perf_event_start(struct perf_event *event, int flags)
 
        ddr_perf_counter_enable(pmu, event->attr.config, counter, true);
 
+       if (!pmu->active_counter++)
+               ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+                       EVENT_CYCLES_COUNTER, true);
+
        hwc->state = 0;
 }
 
@@ -548,6 +573,10 @@ static void ddr_perf_event_stop(struct perf_event *event, int flags)
        ddr_perf_counter_enable(pmu, event->attr.config, counter, false);
        ddr_perf_event_update(event);
 
+       if (!--pmu->active_counter)
+               ddr_perf_counter_enable(pmu, EVENT_CYCLES_ID,
+                       EVENT_CYCLES_COUNTER, false);
+
        hwc->state |= PERF_HES_STOPPED;
 }
 
@@ -565,25 +594,10 @@ static void ddr_perf_event_del(struct perf_event *event, int flags)
 
 static void ddr_perf_pmu_enable(struct pmu *pmu)
 {
-       struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
-
-       /* enable cycle counter if cycle is not active event list */
-       if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
-               ddr_perf_counter_enable(ddr_pmu,
-                                     EVENT_CYCLES_ID,
-                                     EVENT_CYCLES_COUNTER,
-                                     true);
 }
 
 static void ddr_perf_pmu_disable(struct pmu *pmu)
 {
-       struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
-
-       if (ddr_pmu->events[EVENT_CYCLES_COUNTER] == NULL)
-               ddr_perf_counter_enable(ddr_pmu,
-                                     EVENT_CYCLES_ID,
-                                     EVENT_CYCLES_COUNTER,
-                                     false);
 }
 
 static int ddr_perf_init(struct ddr_pmu *pmu, void __iomem *base,
index 71d5b07..5cf770a 100644 (file)
@@ -7,9 +7,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/platform_device.h>
 #include <linux/perf_event.h>
 
 /* Performance monitor configuration */
index e10fc7c..5a00adb 100644 (file)
@@ -665,8 +665,8 @@ static int hisi_pcie_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
        struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
 
        if (pcie_pmu->on_cpu == -1) {
-               pcie_pmu->on_cpu = cpu;
-               WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(cpu)));
+               pcie_pmu->on_cpu = cpumask_local_spread(0, dev_to_node(&pcie_pmu->pdev->dev));
+               WARN_ON(irq_set_affinity(pcie_pmu->irq, cpumask_of(pcie_pmu->on_cpu)));
        }
 
        return 0;
@@ -676,14 +676,23 @@ static int hisi_pcie_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
 {
        struct hisi_pcie_pmu *pcie_pmu = hlist_entry_safe(node, struct hisi_pcie_pmu, node);
        unsigned int target;
+       cpumask_t mask;
+       int numa_node;
 
        /* Nothing to do if this CPU doesn't own the PMU */
        if (pcie_pmu->on_cpu != cpu)
                return 0;
 
        pcie_pmu->on_cpu = -1;
-       /* Choose a new CPU from all online cpus. */
-       target = cpumask_any_but(cpu_online_mask, cpu);
+
+       /* Choose a local CPU from all online cpus. */
+       numa_node = dev_to_node(&pcie_pmu->pdev->dev);
+       if (cpumask_and(&mask, cpumask_of_node(numa_node), cpu_online_mask) &&
+           cpumask_andnot(&mask, &mask, cpumask_of(cpu)))
+               target = cpumask_any(&mask);
+       else
+               target = cpumask_any_but(cpu_online_mask, cpu);
+
        if (target >= nr_cpu_ids) {
                pci_err(pcie_pmu->pdev, "There is no CPU to set\n");
                return 0;
index b94a5f6..524ba82 100644 (file)
@@ -8,11 +8,10 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/perf_event.h>
 #include <linux/hrtimer.h>
 #include <linux/acpi.h>
+#include <linux/platform_device.h>
 
 /* Performance Counters Operating Mode Control Registers */
 #define DDRC_PERF_CNT_OP_MODE_CTRL     0x8020
index 3972197..fec8e82 100644 (file)
@@ -6,10 +6,9 @@
 
 #define pr_fmt(fmt) "tad_pmu: " fmt
 
+#include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
 #include <linux/cpuhotplug.h>
 #include <linux/perf_event.h>
 #include <linux/platform_device.h>
index ebca5ea..56897d4 100644 (file)
@@ -181,9 +181,6 @@ void riscv_pmu_start(struct perf_event *event, int flags)
        uint64_t max_period = riscv_pmu_ctr_get_width_mask(event);
        u64 init_val;
 
-       if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
-               return;
-
        if (flags & PERF_EF_RELOAD)
                WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
 
index 0c32dff..9972bfc 100644 (file)
@@ -1833,7 +1833,6 @@ static int xgene_pmu_probe(struct platform_device *pdev)
        const struct xgene_pmu_data *dev_data;
        const struct of_device_id *of_id;
        struct xgene_pmu *xgene_pmu;
-       struct resource *res;
        int irq, rc;
        int version;
 
@@ -1883,8 +1882,7 @@ static int xgene_pmu_probe(struct platform_device *pdev)
        xgene_pmu->version = version;
        dev_info(&pdev->dev, "X-Gene PMU version %d\n", xgene_pmu->version);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       xgene_pmu->pcppmu_csr = devm_ioremap_resource(&pdev->dev, res);
+       xgene_pmu->pcppmu_csr = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(xgene_pmu->pcppmu_csr)) {
                dev_err(&pdev->dev, "ioremap failed for PCP PMU resource\n");
                return PTR_ERR(xgene_pmu->pcppmu_csr);
index 15dafe3..6ae6d50 100644 (file)
@@ -184,7 +184,7 @@ static int hisi_inno_phy_probe(struct platform_device *pdev)
                phy_set_drvdata(phy, &priv->ports[i]);
                i++;
 
-               if (i > INNO_PHY_PORT_NUM) {
+               if (i >= INNO_PHY_PORT_NUM) {
                        dev_warn(dev, "Support %d ports in maximum\n", i);
                        of_node_put(child);
                        break;
index 232fd3f..d7024a1 100644 (file)
@@ -169,7 +169,7 @@ static int mtk_dp_phy_probe(struct platform_device *pdev)
 
        regs = *(struct regmap **)dev->platform_data;
        if (!regs)
-               return dev_err_probe(dev, EINVAL,
+               return dev_err_probe(dev, -EINVAL,
                                     "No data passed, requires struct regmap**\n");
 
        dp_phy = devm_kzalloc(dev, sizeof(*dp_phy), GFP_KERNEL);
index 8aa7251..bbfe11d 100644 (file)
@@ -253,7 +253,7 @@ static int mtk_hdmi_pll_calc(struct mtk_hdmi_phy *hdmi_phy, struct clk_hw *hw,
        for (i = 0; i < ARRAY_SIZE(txpredivs); i++) {
                ns_hdmipll_ck = 5 * tmds_clk * txposdiv * txpredivs[i];
                if (ns_hdmipll_ck >= 5 * GIGA &&
-                   ns_hdmipll_ck <= 1 * GIGA)
+                   ns_hdmipll_ck <= 12 * GIGA)
                        break;
        }
        if (i == (ARRAY_SIZE(txpredivs) - 1) &&
index 6c237f3..d0319be 100644 (file)
@@ -110,11 +110,13 @@ struct phy_override_seq {
 /**
  * struct qcom_snps_hsphy - snps hs phy attributes
  *
+ * @dev: device structure
+ *
  * @phy: generic phy
  * @base: iomapped memory space for snps hs phy
  *
- * @cfg_ahb_clk: AHB2PHY interface clock
- * @ref_clk: phy reference clock
+ * @num_clks: number of clocks
+ * @clks: array of clocks
  * @phy_reset: phy reset control
  * @vregs: regulator supplies bulk data
  * @phy_initialized: if PHY has been initialized correctly
@@ -122,11 +124,13 @@ struct phy_override_seq {
  * @update_seq_cfg: tuning parameters for phy init
  */
 struct qcom_snps_hsphy {
+       struct device *dev;
+
        struct phy *phy;
        void __iomem *base;
 
-       struct clk *cfg_ahb_clk;
-       struct clk *ref_clk;
+       int num_clks;
+       struct clk_bulk_data *clks;
        struct reset_control *phy_reset;
        struct regulator_bulk_data vregs[SNPS_HS_NUM_VREGS];
 
@@ -135,6 +139,34 @@ struct qcom_snps_hsphy {
        struct phy_override_seq update_seq_cfg[NUM_HSPHY_TUNING_PARAMS];
 };
 
+static int qcom_snps_hsphy_clk_init(struct qcom_snps_hsphy *hsphy)
+{
+       struct device *dev = hsphy->dev;
+
+       hsphy->num_clks = 2;
+       hsphy->clks = devm_kcalloc(dev, hsphy->num_clks, sizeof(*hsphy->clks), GFP_KERNEL);
+       if (!hsphy->clks)
+               return -ENOMEM;
+
+       /*
+        * TODO: Currently no device tree instantiation of the PHY is using the clock.
+        * This needs to be fixed in order for this code to be able to use devm_clk_bulk_get().
+        */
+       hsphy->clks[0].id = "cfg_ahb";
+       hsphy->clks[0].clk = devm_clk_get_optional(dev, "cfg_ahb");
+       if (IS_ERR(hsphy->clks[0].clk))
+               return dev_err_probe(dev, PTR_ERR(hsphy->clks[0].clk),
+                                    "failed to get cfg_ahb clk\n");
+
+       hsphy->clks[1].id = "ref";
+       hsphy->clks[1].clk = devm_clk_get(dev, "ref");
+       if (IS_ERR(hsphy->clks[1].clk))
+               return dev_err_probe(dev, PTR_ERR(hsphy->clks[1].clk),
+                                    "failed to get ref clk\n");
+
+       return 0;
+}
+
 static inline void qcom_snps_hsphy_write_mask(void __iomem *base, u32 offset,
                                                u32 mask, u32 val)
 {
@@ -165,22 +197,13 @@ static int qcom_snps_hsphy_suspend(struct qcom_snps_hsphy *hsphy)
                                           0, USB2_AUTO_RESUME);
        }
 
-       clk_disable_unprepare(hsphy->cfg_ahb_clk);
        return 0;
 }
 
 static int qcom_snps_hsphy_resume(struct qcom_snps_hsphy *hsphy)
 {
-       int ret;
-
        dev_dbg(&hsphy->phy->dev, "Resume QCOM SNPS PHY, mode\n");
 
-       ret = clk_prepare_enable(hsphy->cfg_ahb_clk);
-       if (ret) {
-               dev_err(&hsphy->phy->dev, "failed to enable cfg ahb clock\n");
-               return ret;
-       }
-
        return 0;
 }
 
@@ -191,8 +214,7 @@ static int __maybe_unused qcom_snps_hsphy_runtime_suspend(struct device *dev)
        if (!hsphy->phy_initialized)
                return 0;
 
-       qcom_snps_hsphy_suspend(hsphy);
-       return 0;
+       return qcom_snps_hsphy_suspend(hsphy);
 }
 
 static int __maybe_unused qcom_snps_hsphy_runtime_resume(struct device *dev)
@@ -202,8 +224,7 @@ static int __maybe_unused qcom_snps_hsphy_runtime_resume(struct device *dev)
        if (!hsphy->phy_initialized)
                return 0;
 
-       qcom_snps_hsphy_resume(hsphy);
-       return 0;
+       return qcom_snps_hsphy_resume(hsphy);
 }
 
 static int qcom_snps_hsphy_set_mode(struct phy *phy, enum phy_mode mode,
@@ -374,16 +395,16 @@ static int qcom_snps_hsphy_init(struct phy *phy)
        if (ret)
                return ret;
 
-       ret = clk_prepare_enable(hsphy->cfg_ahb_clk);
+       ret = clk_bulk_prepare_enable(hsphy->num_clks, hsphy->clks);
        if (ret) {
-               dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret);
+               dev_err(&phy->dev, "failed to enable clocks, %d\n", ret);
                goto poweroff_phy;
        }
 
        ret = reset_control_assert(hsphy->phy_reset);
        if (ret) {
                dev_err(&phy->dev, "failed to assert phy_reset, %d\n", ret);
-               goto disable_ahb_clk;
+               goto disable_clks;
        }
 
        usleep_range(100, 150);
@@ -391,7 +412,7 @@ static int qcom_snps_hsphy_init(struct phy *phy)
        ret = reset_control_deassert(hsphy->phy_reset);
        if (ret) {
                dev_err(&phy->dev, "failed to de-assert phy_reset, %d\n", ret);
-               goto disable_ahb_clk;
+               goto disable_clks;
        }
 
        qcom_snps_hsphy_write_mask(hsphy->base, USB2_PHY_USB_PHY_CFG0,
@@ -448,8 +469,8 @@ static int qcom_snps_hsphy_init(struct phy *phy)
 
        return 0;
 
-disable_ahb_clk:
-       clk_disable_unprepare(hsphy->cfg_ahb_clk);
+disable_clks:
+       clk_bulk_disable_unprepare(hsphy->num_clks, hsphy->clks);
 poweroff_phy:
        regulator_bulk_disable(ARRAY_SIZE(hsphy->vregs), hsphy->vregs);
 
@@ -461,7 +482,7 @@ static int qcom_snps_hsphy_exit(struct phy *phy)
        struct qcom_snps_hsphy *hsphy = phy_get_drvdata(phy);
 
        reset_control_assert(hsphy->phy_reset);
-       clk_disable_unprepare(hsphy->cfg_ahb_clk);
+       clk_bulk_disable_unprepare(hsphy->num_clks, hsphy->clks);
        regulator_bulk_disable(ARRAY_SIZE(hsphy->vregs), hsphy->vregs);
        hsphy->phy_initialized = false;
 
@@ -554,14 +575,15 @@ static int qcom_snps_hsphy_probe(struct platform_device *pdev)
        if (!hsphy)
                return -ENOMEM;
 
+       hsphy->dev = dev;
+
        hsphy->base = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(hsphy->base))
                return PTR_ERR(hsphy->base);
 
-       hsphy->ref_clk = devm_clk_get(dev, "ref");
-       if (IS_ERR(hsphy->ref_clk))
-               return dev_err_probe(dev, PTR_ERR(hsphy->ref_clk),
-                                    "failed to get ref clk\n");
+       ret = qcom_snps_hsphy_clk_init(hsphy);
+       if (ret)
+               return dev_err_probe(dev, ret, "failed to initialize clocks\n");
 
        hsphy->phy_reset = devm_reset_control_get_exclusive(&pdev->dev, NULL);
        if (IS_ERR(hsphy->phy_reset)) {
index 3c4220b..4dff656 100644 (file)
@@ -116,21 +116,19 @@ static void amd_gpio_set_value(struct gpio_chip *gc, unsigned offset, int value)
        raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 }
 
-static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
-               unsigned debounce)
+static int amd_gpio_set_debounce(struct amd_gpio *gpio_dev, unsigned int offset,
+                                unsigned int debounce)
 {
        u32 time;
        u32 pin_reg;
        int ret = 0;
-       unsigned long flags;
-       struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
-
-       raw_spin_lock_irqsave(&gpio_dev->lock, flags);
 
        /* Use special handling for Pin0 debounce */
-       pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
-       if (pin_reg & INTERNAL_GPIO0_DEBOUNCE)
-               debounce = 0;
+       if (offset == 0) {
+               pin_reg = readl(gpio_dev->base + WAKE_INT_MASTER_REG);
+               if (pin_reg & INTERNAL_GPIO0_DEBOUNCE)
+                       debounce = 0;
+       }
 
        pin_reg = readl(gpio_dev->base + offset * 4);
 
@@ -182,23 +180,10 @@ static int amd_gpio_set_debounce(struct gpio_chip *gc, unsigned offset,
                pin_reg &= ~(DB_CNTRl_MASK << DB_CNTRL_OFF);
        }
        writel(pin_reg, gpio_dev->base + offset * 4);
-       raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
        return ret;
 }
 
-static int amd_gpio_set_config(struct gpio_chip *gc, unsigned offset,
-                              unsigned long config)
-{
-       u32 debounce;
-
-       if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
-               return -ENOTSUPP;
-
-       debounce = pinconf_to_config_argument(config);
-       return amd_gpio_set_debounce(gc, offset, debounce);
-}
-
 #ifdef CONFIG_DEBUG_FS
 static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc)
 {
@@ -220,7 +205,6 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc)
        char *pin_sts;
        char *interrupt_sts;
        char *wake_sts;
-       char *pull_up_sel;
        char *orientation;
        char debounce_value[40];
        char *debounce_enable;
@@ -328,14 +312,9 @@ static void amd_gpio_dbg_show(struct seq_file *s, struct gpio_chip *gc)
                        seq_printf(s, "   %s|", wake_sts);
 
                        if (pin_reg & BIT(PULL_UP_ENABLE_OFF)) {
-                               if (pin_reg & BIT(PULL_UP_SEL_OFF))
-                                       pull_up_sel = "8k";
-                               else
-                                       pull_up_sel = "4k";
-                               seq_printf(s, "%s â†‘|",
-                                          pull_up_sel);
+                               seq_puts(s, "  â†‘ |");
                        } else if (pin_reg & BIT(PULL_DOWN_ENABLE_OFF)) {
-                               seq_puts(s, "   â†“|");
+                               seq_puts(s, "  â†“ |");
                        } else  {
                                seq_puts(s, "    |");
                        }
@@ -761,7 +740,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev,
                break;
 
        case PIN_CONFIG_BIAS_PULL_UP:
-               arg = (pin_reg >> PULL_UP_SEL_OFF) & (BIT(0) | BIT(1));
+               arg = (pin_reg >> PULL_UP_ENABLE_OFF) & BIT(0);
                break;
 
        case PIN_CONFIG_DRIVE_STRENGTH:
@@ -780,7 +759,7 @@ static int amd_pinconf_get(struct pinctrl_dev *pctldev,
 }
 
 static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
-                               unsigned long *configs, unsigned num_configs)
+                          unsigned long *configs, unsigned int num_configs)
 {
        int i;
        u32 arg;
@@ -798,9 +777,8 @@ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
 
                switch (param) {
                case PIN_CONFIG_INPUT_DEBOUNCE:
-                       pin_reg &= ~DB_TMR_OUT_MASK;
-                       pin_reg |= arg & DB_TMR_OUT_MASK;
-                       break;
+                       ret = amd_gpio_set_debounce(gpio_dev, pin, arg);
+                       goto out_unlock;
 
                case PIN_CONFIG_BIAS_PULL_DOWN:
                        pin_reg &= ~BIT(PULL_DOWN_ENABLE_OFF);
@@ -808,10 +786,8 @@ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
                        break;
 
                case PIN_CONFIG_BIAS_PULL_UP:
-                       pin_reg &= ~BIT(PULL_UP_SEL_OFF);
-                       pin_reg |= (arg & BIT(0)) << PULL_UP_SEL_OFF;
                        pin_reg &= ~BIT(PULL_UP_ENABLE_OFF);
-                       pin_reg |= ((arg>>1) & BIT(0)) << PULL_UP_ENABLE_OFF;
+                       pin_reg |= (arg & BIT(0)) << PULL_UP_ENABLE_OFF;
                        break;
 
                case PIN_CONFIG_DRIVE_STRENGTH:
@@ -829,6 +805,7 @@ static int amd_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
 
                writel(pin_reg, gpio_dev->base + pin*4);
        }
+out_unlock:
        raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
 
        return ret;
@@ -870,6 +847,14 @@ static int amd_pinconf_group_set(struct pinctrl_dev *pctldev,
        return 0;
 }
 
+static int amd_gpio_set_config(struct gpio_chip *gc, unsigned int pin,
+                              unsigned long config)
+{
+       struct amd_gpio *gpio_dev = gpiochip_get_data(gc);
+
+       return amd_pinconf_set(gpio_dev->pctrl, pin, &config, 1);
+}
+
 static const struct pinconf_ops amd_pinconf_ops = {
        .pin_config_get         = amd_pinconf_get,
        .pin_config_set         = amd_pinconf_set,
@@ -877,6 +862,33 @@ static const struct pinconf_ops amd_pinconf_ops = {
        .pin_config_group_set = amd_pinconf_group_set,
 };
 
+static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
+{
+       struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
+       unsigned long flags;
+       u32 pin_reg, mask;
+       int i;
+
+       mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) |
+               BIT(WAKE_CNTRL_OFF_S4);
+
+       for (i = 0; i < desc->npins; i++) {
+               int pin = desc->pins[i].number;
+               const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin);
+
+               if (!pd)
+                       continue;
+
+               raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+
+               pin_reg = readl(gpio_dev->base + pin * 4);
+               pin_reg &= ~mask;
+               writel(pin_reg, gpio_dev->base + pin * 4);
+
+               raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+       }
+}
+
 #ifdef CONFIG_PM_SLEEP
 static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin)
 {
@@ -1114,6 +1126,9 @@ static int amd_gpio_probe(struct platform_device *pdev)
                return PTR_ERR(gpio_dev->pctrl);
        }
 
+       /* Disable and mask interrupts */
+       amd_gpio_irq_init(gpio_dev);
+
        girq = &gpio_dev->gc.irq;
        gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip);
        /* This will let us handle the parent IRQ in the driver */
index 1cf2d06..34c5c3e 100644 (file)
@@ -36,7 +36,6 @@
 #define WAKE_CNTRL_OFF_S4               15
 #define PIN_STS_OFF                    16
 #define DRV_STRENGTH_SEL_OFF           17
-#define PULL_UP_SEL_OFF                        19
 #define PULL_UP_ENABLE_OFF             20
 #define PULL_DOWN_ENABLE_OFF           21
 #define OUTPUT_VALUE_OFF               22
index 2585ef2..115b83e 100644 (file)
@@ -1038,6 +1038,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
        struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
        struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
        const struct msm_pingroup *g;
+       u32 intr_target_mask = GENMASK(2, 0);
        unsigned long flags;
        bool was_enabled;
        u32 val;
@@ -1074,13 +1075,15 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
         * With intr_target_use_scm interrupts are routed to
         * application cpu using scm calls.
         */
+       if (g->intr_target_width)
+               intr_target_mask = GENMASK(g->intr_target_width - 1, 0);
+
        if (pctrl->intr_target_use_scm) {
                u32 addr = pctrl->phys_base[0] + g->intr_target_reg;
                int ret;
 
                qcom_scm_io_readl(addr, &val);
-
-               val &= ~(7 << g->intr_target_bit);
+               val &= ~(intr_target_mask << g->intr_target_bit);
                val |= g->intr_target_kpss_val << g->intr_target_bit;
 
                ret = qcom_scm_io_writel(addr, val);
@@ -1090,7 +1093,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
                                d->hwirq);
        } else {
                val = msm_readl_intr_target(pctrl, g);
-               val &= ~(7 << g->intr_target_bit);
+               val &= ~(intr_target_mask << g->intr_target_bit);
                val |= g->intr_target_kpss_val << g->intr_target_bit;
                msm_writel_intr_target(val, pctrl, g);
        }
index 5e4410b..1d2f2e9 100644 (file)
@@ -59,6 +59,7 @@ struct pinctrl_pin_desc;
  * @intr_status_bit:      Offset in @intr_status_reg for reading and acking the interrupt
  *                        status.
  * @intr_target_bit:      Offset in @intr_target_reg for configuring the interrupt routing.
+ * @intr_target_width:    Number of bits used for specifying interrupt routing target.
  * @intr_target_kpss_val: Value in @intr_target_bit for specifying that the interrupt from
  *                        this gpio should get routed to the KPSS processor.
  * @intr_raw_status_bit:  Offset in @intr_cfg_reg for the raw status bit.
@@ -100,6 +101,7 @@ struct msm_pingroup {
        unsigned intr_ack_high:1;
 
        unsigned intr_target_bit:5;
+       unsigned intr_target_width:5;
        unsigned intr_target_kpss_val:5;
        unsigned intr_raw_status_bit:5;
        unsigned intr_polarity_bit:5;
index 8a5cd15..8fdea25 100644 (file)
@@ -46,6 +46,7 @@
                .intr_enable_bit = 0,           \
                .intr_status_bit = 0,           \
                .intr_target_bit = 5,           \
+               .intr_target_width = 4,         \
                .intr_target_kpss_val = 3,      \
                .intr_raw_status_bit = 4,       \
                .intr_polarity_bit = 1,         \
index 40b1326..5591ddf 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/pinctrl/pinmux.h>
 
@@ -46,6 +47,7 @@ struct rza2_pinctrl_priv {
        struct pinctrl_dev *pctl;
        struct pinctrl_gpio_range gpio_range;
        int npins;
+       struct mutex mutex; /* serialize adding groups and functions */
 };
 
 #define RZA2_PDR(port)         (0x0000 + (port) * 2)   /* Direction 16-bit */
@@ -358,10 +360,14 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
                psel_val[i] = MUX_FUNC(value);
        }
 
+       mutex_lock(&priv->mutex);
+
        /* Register a single pin group listing all the pins we read from DT */
        gsel = pinctrl_generic_add_group(pctldev, np->name, pins, npins, NULL);
-       if (gsel < 0)
-               return gsel;
+       if (gsel < 0) {
+               ret = gsel;
+               goto unlock;
+       }
 
        /*
         * Register a single group function where the 'data' is an array PSEL
@@ -390,6 +396,8 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
        (*map)->data.mux.function = np->name;
        *num_maps = 1;
 
+       mutex_unlock(&priv->mutex);
+
        return 0;
 
 remove_function:
@@ -398,6 +406,9 @@ remove_function:
 remove_group:
        pinctrl_generic_remove_group(pctldev, gsel);
 
+unlock:
+       mutex_unlock(&priv->mutex);
+
        dev_err(priv->dev, "Unable to parse DT node %s\n", np->name);
 
        return ret;
@@ -473,6 +484,8 @@ static int rza2_pinctrl_probe(struct platform_device *pdev)
        if (IS_ERR(priv->base))
                return PTR_ERR(priv->base);
 
+       mutex_init(&priv->mutex);
+
        platform_set_drvdata(pdev, priv);
 
        priv->npins = (int)(uintptr_t)of_device_get_match_data(&pdev->dev) *
index 9511d92..6e8a765 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/seq_file.h>
@@ -149,10 +150,11 @@ struct rzg2l_pinctrl {
        struct gpio_chip                gpio_chip;
        struct pinctrl_gpio_range       gpio_range;
        DECLARE_BITMAP(tint_slot, RZG2L_TINT_MAX_INTERRUPT);
-       spinlock_t                      bitmap_lock;
+       spinlock_t                      bitmap_lock; /* protect tint_slot bitmap */
        unsigned int                    hwirq[RZG2L_TINT_MAX_INTERRUPT];
 
-       spinlock_t                      lock;
+       spinlock_t                      lock; /* lock read/write registers */
+       struct mutex                    mutex; /* serialize adding groups and functions */
 };
 
 static const unsigned int iolh_groupa_mA[] = { 2, 4, 8, 12 };
@@ -249,6 +251,7 @@ static int rzg2l_map_add_config(struct pinctrl_map *map,
 
 static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
                                   struct device_node *np,
+                                  struct device_node *parent,
                                   struct pinctrl_map **map,
                                   unsigned int *num_maps,
                                   unsigned int *index)
@@ -266,6 +269,7 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
        struct property *prop;
        int ret, gsel, fsel;
        const char **pin_fn;
+       const char *name;
        const char *pin;
 
        pinmux = of_find_property(np, "pinmux", NULL);
@@ -349,28 +353,42 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
                psel_val[i] = MUX_FUNC(value);
        }
 
+       if (parent) {
+               name = devm_kasprintf(pctrl->dev, GFP_KERNEL, "%pOFn.%pOFn",
+                                     parent, np);
+               if (!name) {
+                       ret = -ENOMEM;
+                       goto done;
+               }
+       } else {
+               name = np->name;
+       }
+
+       mutex_lock(&pctrl->mutex);
+
        /* Register a single pin group listing all the pins we read from DT */
-       gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL);
+       gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
        if (gsel < 0) {
                ret = gsel;
-               goto done;
+               goto unlock;
        }
 
        /*
         * Register a single group function where the 'data' is an array PSEL
         * register values read from DT.
         */
-       pin_fn[0] = np->name;
-       fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1,
-                                          psel_val);
+       pin_fn[0] = name;
+       fsel = pinmux_generic_add_function(pctldev, name, pin_fn, 1, psel_val);
        if (fsel < 0) {
                ret = fsel;
                goto remove_group;
        }
 
+       mutex_unlock(&pctrl->mutex);
+
        maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
-       maps[idx].data.mux.group = np->name;
-       maps[idx].data.mux.function = np->name;
+       maps[idx].data.mux.group = name;
+       maps[idx].data.mux.function = name;
        idx++;
 
        dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux);
@@ -379,6 +397,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 
 remove_group:
        pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+       mutex_unlock(&pctrl->mutex);
 done:
        *index = idx;
        kfree(configs);
@@ -417,7 +437,7 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev,
        index = 0;
 
        for_each_child_of_node(np, child) {
-               ret = rzg2l_dt_subnode_to_map(pctldev, child, map,
+               ret = rzg2l_dt_subnode_to_map(pctldev, child, np, map,
                                              num_maps, &index);
                if (ret < 0) {
                        of_node_put(child);
@@ -426,7 +446,7 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev,
        }
 
        if (*num_maps == 0) {
-               ret = rzg2l_dt_subnode_to_map(pctldev, np, map,
+               ret = rzg2l_dt_subnode_to_map(pctldev, np, NULL, map,
                                              num_maps, &index);
                if (ret < 0)
                        goto done;
@@ -1497,6 +1517,7 @@ static int rzg2l_pinctrl_probe(struct platform_device *pdev)
 
        spin_lock_init(&pctrl->lock);
        spin_lock_init(&pctrl->bitmap_lock);
+       mutex_init(&pctrl->mutex);
 
        platform_set_drvdata(pdev, pctrl);
 
index e547229..9146101 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/of_device.h>
 #include <linux/spinlock.h>
 
@@ -123,7 +124,8 @@ struct rzv2m_pinctrl {
        struct gpio_chip                gpio_chip;
        struct pinctrl_gpio_range       gpio_range;
 
-       spinlock_t                      lock;
+       spinlock_t                      lock; /* lock read/write registers */
+       struct mutex                    mutex; /* serialize adding groups and functions */
 };
 
 static const unsigned int drv_1_8V_group2_uA[] = { 1800, 3800, 7800, 11000 };
@@ -209,6 +211,7 @@ static int rzv2m_map_add_config(struct pinctrl_map *map,
 
 static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
                                   struct device_node *np,
+                                  struct device_node *parent,
                                   struct pinctrl_map **map,
                                   unsigned int *num_maps,
                                   unsigned int *index)
@@ -226,6 +229,7 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
        struct property *prop;
        int ret, gsel, fsel;
        const char **pin_fn;
+       const char *name;
        const char *pin;
 
        pinmux = of_find_property(np, "pinmux", NULL);
@@ -309,28 +313,42 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
                psel_val[i] = MUX_FUNC(value);
        }
 
+       if (parent) {
+               name = devm_kasprintf(pctrl->dev, GFP_KERNEL, "%pOFn.%pOFn",
+                                     parent, np);
+               if (!name) {
+                       ret = -ENOMEM;
+                       goto done;
+               }
+       } else {
+               name = np->name;
+       }
+
+       mutex_lock(&pctrl->mutex);
+
        /* Register a single pin group listing all the pins we read from DT */
-       gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL);
+       gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
        if (gsel < 0) {
                ret = gsel;
-               goto done;
+               goto unlock;
        }
 
        /*
         * Register a single group function where the 'data' is an array PSEL
         * register values read from DT.
         */
-       pin_fn[0] = np->name;
-       fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1,
-                                          psel_val);
+       pin_fn[0] = name;
+       fsel = pinmux_generic_add_function(pctldev, name, pin_fn, 1, psel_val);
        if (fsel < 0) {
                ret = fsel;
                goto remove_group;
        }
 
+       mutex_unlock(&pctrl->mutex);
+
        maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
-       maps[idx].data.mux.group = np->name;
-       maps[idx].data.mux.function = np->name;
+       maps[idx].data.mux.group = name;
+       maps[idx].data.mux.function = name;
        idx++;
 
        dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux);
@@ -339,6 +357,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
 
 remove_group:
        pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+       mutex_unlock(&pctrl->mutex);
 done:
        *index = idx;
        kfree(configs);
@@ -377,7 +397,7 @@ static int rzv2m_dt_node_to_map(struct pinctrl_dev *pctldev,
        index = 0;
 
        for_each_child_of_node(np, child) {
-               ret = rzv2m_dt_subnode_to_map(pctldev, child, map,
+               ret = rzv2m_dt_subnode_to_map(pctldev, child, np, map,
                                              num_maps, &index);
                if (ret < 0) {
                        of_node_put(child);
@@ -386,7 +406,7 @@ static int rzv2m_dt_node_to_map(struct pinctrl_dev *pctldev,
        }
 
        if (*num_maps == 0) {
-               ret = rzv2m_dt_subnode_to_map(pctldev, np, map,
+               ret = rzv2m_dt_subnode_to_map(pctldev, np, NULL, map,
                                              num_maps, &index);
                if (ret < 0)
                        goto done;
@@ -1059,6 +1079,7 @@ static int rzv2m_pinctrl_probe(struct platform_device *pdev)
        }
 
        spin_lock_init(&pctrl->lock);
+       mutex_init(&pctrl->mutex);
 
        platform_set_drvdata(pdev, pctrl);
 
index 50d8a4d..e6e6dcf 100644 (file)
@@ -90,7 +90,36 @@ static int chromeos_acpi_handle_package(struct device *dev, union acpi_object *o
        case ACPI_TYPE_STRING:
                return sysfs_emit(buf, "%s\n", element->string.pointer);
        case ACPI_TYPE_BUFFER:
-               return sysfs_emit(buf, "%s\n", element->buffer.pointer);
+               {
+                       int i, r, at, room_left;
+                       const int byte_per_line = 16;
+
+                       at = 0;
+                       room_left = PAGE_SIZE - 1;
+                       for (i = 0; i < element->buffer.length && room_left; i += byte_per_line) {
+                               r = hex_dump_to_buffer(element->buffer.pointer + i,
+                                                      element->buffer.length - i,
+                                                      byte_per_line, 1, buf + at, room_left,
+                                                      false);
+                               if (r > room_left)
+                                       goto truncating;
+                               at += r;
+                               room_left -= r;
+
+                               r = sysfs_emit_at(buf, at, "\n");
+                               if (!r)
+                                       goto truncating;
+                               at += r;
+                               room_left -= r;
+                       }
+
+                       buf[at] = 0;
+                       return at;
+truncating:
+                       dev_info_once(dev, "truncating sysfs content for %s\n", name);
+                       sysfs_emit_at(buf, PAGE_SIZE - 4, "..\n");
+                       return PAGE_SIZE - 1;
+               }
        default:
                dev_err(dev, "element type %d not supported\n", element->type);
                return -EINVAL;
@@ -235,9 +264,9 @@ static int chromeos_acpi_device_probe(struct platform_device *pdev)
        return 0;
 }
 
-/* GGL is valid PNP ID of Google. PNP ID can be used with the ACPI devices. */
 static const struct acpi_device_id chromeos_device_ids[] = {
        { "GGL0001", 0 },
+       { "GOOG0016", 0 },
        {}
 };
 MODULE_DEVICE_TABLE(acpi, chromeos_device_ids);
index 500a61b..3565724 100644 (file)
@@ -327,8 +327,8 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
                dev_emerg(ec_dev->dev, "CrOS EC Panic Reported. Shutdown is imminent!");
                blocking_notifier_call_chain(&ec_dev->panic_notifier, 0, ec_dev);
                kobject_uevent_env(&ec_dev->dev->kobj, KOBJ_CHANGE, (char **)env);
-               /* Begin orderly shutdown. Force shutdown after 1 second. */
-               hw_protection_shutdown("CrOS EC Panic", 1000);
+               /* Begin orderly shutdown. EC will force reset after a short period. */
+               hw_protection_shutdown("CrOS EC Panic", -1);
                /* Do not query for other events after a panic is reported */
                return;
        }
index a79318e..b600b77 100644 (file)
@@ -887,6 +887,7 @@ static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
                        tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
                        mlxbf_tmfifo_console_output(tm_vdev, vring);
                        spin_unlock_irqrestore(&fifo->spin_lock[0], flags);
+                       set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
                } else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
                                            &fifo->pend_events)) {
                        return true;
index 2c22919..65732f0 100644 (file)
@@ -4,7 +4,7 @@
 # AMD x86 Platform-Specific Drivers
 #
 
-amd-pmc-y                      := pmc.o
+amd-pmc-y                      := pmc.o pmc-quirks.o
 obj-$(CONFIG_AMD_PMC)          += amd-pmc.o
 amd_hsmp-y                     := hsmp.o
 obj-$(CONFIG_AMD_HSMP)         += amd_hsmp.o
diff --git a/drivers/platform/x86/amd/pmc-quirks.c b/drivers/platform/x86/amd/pmc-quirks.c
new file mode 100644 (file)
index 0000000..ad70246
--- /dev/null
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD SoC Power Management Controller Driver Quirks
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#include <linux/dmi.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+
+#include "pmc.h"
+
+struct quirk_entry {
+       u32 s2idle_bug_mmio;
+};
+
+static struct quirk_entry quirk_s2idle_bug = {
+       .s2idle_bug_mmio = 0xfed80380,
+};
+
+static const struct dmi_system_id fwbug_list[] = {
+       {
+               .ident = "L14 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20X5"),
+               }
+       },
+       {
+               .ident = "T14s Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XF"),
+               }
+       },
+       {
+               .ident = "X13 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XH"),
+               }
+       },
+       {
+               .ident = "T14 Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20XK"),
+               }
+       },
+       {
+               .ident = "T14 Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UD"),
+               }
+       },
+       {
+               .ident = "T14 Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UE"),
+               }
+       },
+       {
+               .ident = "T14s Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UH"),
+               }
+       },
+       {
+               .ident = "T14s Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20UJ"),
+               }
+       },
+       {
+               .ident = "P14s Gen1 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"),
+               }
+       },
+       {
+               .ident = "P14s Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "21A0"),
+               }
+       },
+       {
+               .ident = "P14s Gen2 AMD",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "21A1"),
+               }
+       },
+       /* https://gitlab.freedesktop.org/drm/amd/-/issues/2684 */
+       {
+               .ident = "HP Laptop 15s-eq2xxx",
+               .driver_data = &quirk_s2idle_bug,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "HP Laptop 15s-eq2xxx"),
+               }
+       },
+       {}
+};
+
+/*
+ * Laptops that run a SMI handler during the D3->D0 transition that occurs
+ * specifically when exiting suspend to idle which can cause
+ * large delays during resume when the IOMMU translation layer is enabled (the default
+ * behavior) for NVME devices:
+ *
+ * To avoid this firmware problem, skip the SMI handler on these machines before the
+ * D0 transition occurs.
+ */
+static void amd_pmc_skip_nvme_smi_handler(u32 s2idle_bug_mmio)
+{
+       void __iomem *addr;
+       u8 val;
+
+       if (!request_mem_region_muxed(s2idle_bug_mmio, 1, "amd_pmc_pm80"))
+               return;
+
+       addr = ioremap(s2idle_bug_mmio, 1);
+       if (!addr)
+               goto cleanup_resource;
+
+       val = ioread8(addr);
+       iowrite8(val & ~BIT(0), addr);
+
+       iounmap(addr);
+cleanup_resource:
+       release_mem_region(s2idle_bug_mmio, 1);
+}
+
+void amd_pmc_process_restore_quirks(struct amd_pmc_dev *dev)
+{
+       if (dev->quirks && dev->quirks->s2idle_bug_mmio)
+               amd_pmc_skip_nvme_smi_handler(dev->quirks->s2idle_bug_mmio);
+}
+
+void amd_pmc_quirks_init(struct amd_pmc_dev *dev)
+{
+       const struct dmi_system_id *dmi_id;
+
+       dmi_id = dmi_first_match(fwbug_list);
+       if (!dmi_id)
+               return;
+       dev->quirks = dmi_id->driver_data;
+       if (dev->quirks->s2idle_bug_mmio)
+               pr_info("Using s2idle quirk to avoid %s platform firmware bug\n",
+                       dmi_id->ident);
+}
index 7d3d080..c1e788b 100644 (file)
@@ -28,6 +28,8 @@
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 
+#include "pmc.h"
+
 /* SMU communication registers */
 #define AMD_PMC_REGISTER_MESSAGE       0x538
 #define AMD_PMC_REGISTER_RESPONSE      0x980
@@ -94,6 +96,7 @@
 #define AMD_CPU_ID_CB                  0x14D8
 #define AMD_CPU_ID_PS                  0x14E8
 #define AMD_CPU_ID_SP                  0x14A4
+#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507
 
 #define PMC_MSG_DELAY_MIN_US           50
 #define RESPONSE_REGISTER_LOOP_MAX     20000
@@ -146,29 +149,6 @@ static const struct amd_pmc_bit_map soc15_ip_blk[] = {
        {}
 };
 
-struct amd_pmc_dev {
-       void __iomem *regbase;
-       void __iomem *smu_virt_addr;
-       void __iomem *stb_virt_addr;
-       void __iomem *fch_virt_addr;
-       bool msg_port;
-       u32 base_addr;
-       u32 cpu_id;
-       u32 active_ips;
-       u32 dram_size;
-       u32 num_ips;
-       u32 s2d_msg_id;
-/* SMU version information */
-       u8 smu_program;
-       u8 major;
-       u8 minor;
-       u8 rev;
-       struct device *dev;
-       struct pci_dev *rdev;
-       struct mutex lock; /* generic mutex lock */
-       struct dentry *dbgfs_dir;
-};
-
 static bool enable_stb;
 module_param(enable_stb, bool, 0644);
 MODULE_PARM_DESC(enable_stb, "Enable the STB debug mechanism");
@@ -891,6 +871,8 @@ static void amd_pmc_s2idle_restore(void)
 
        /* Notify on failed entry */
        amd_pmc_validate_deepest(pdev);
+
+       amd_pmc_process_restore_quirks(pdev);
 }
 
 static struct acpi_s2idle_dev_ops amd_pmc_s2idle_dev_ops = {
@@ -926,6 +908,7 @@ static const struct pci_device_id pmc_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PCO) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RV) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_SP) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) },
        { }
 };
 
@@ -1087,6 +1070,8 @@ static int amd_pmc_probe(struct platform_device *pdev)
                err = acpi_register_lps0_dev(&amd_pmc_s2idle_dev_ops);
                if (err)
                        dev_warn(dev->dev, "failed to register LPS0 sleep handler, expect increased power consumption\n");
+               if (!disable_workarounds)
+                       amd_pmc_quirks_init(dev);
        }
 
        amd_pmc_dbgfs_register(dev);
@@ -1115,6 +1100,7 @@ static const struct acpi_device_id amd_pmc_acpi_ids[] = {
        {"AMDI0007", 0},
        {"AMDI0008", 0},
        {"AMDI0009", 0},
+       {"AMDI000A", 0},
        {"AMD0004", 0},
        {"AMD0005", 0},
        { }
diff --git a/drivers/platform/x86/amd/pmc.h b/drivers/platform/x86/amd/pmc.h
new file mode 100644 (file)
index 0000000..c27bd6a
--- /dev/null
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * AMD SoC Power Management Controller Driver
+ *
+ * Copyright (c) 2023, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#ifndef PMC_H
+#define PMC_H
+
+#include <linux/types.h>
+#include <linux/mutex.h>
+
+struct amd_pmc_dev {
+       void __iomem *regbase;
+       void __iomem *smu_virt_addr;
+       void __iomem *stb_virt_addr;
+       void __iomem *fch_virt_addr;
+       bool msg_port;
+       u32 base_addr;
+       u32 cpu_id;
+       u32 active_ips;
+       u32 dram_size;
+       u32 num_ips;
+       u32 s2d_msg_id;
+/* SMU version information */
+       u8 smu_program;
+       u8 major;
+       u8 minor;
+       u8 rev;
+       struct device *dev;
+       struct pci_dev *rdev;
+       struct mutex lock; /* generic mutex lock */
+       struct dentry *dbgfs_dir;
+       struct quirk_entry *quirks;
+};
+
+void amd_pmc_process_restore_quirks(struct amd_pmc_dev *dev);
+void amd_pmc_quirks_init(struct amd_pmc_dev *dev);
+
+#endif /* PMC_H */
index 081e84e..3fc5e45 100644 (file)
@@ -106,6 +106,27 @@ int apmf_get_static_slider_granular(struct amd_pmf_dev *pdev,
                                                                         data, sizeof(*data));
 }
 
+int apmf_os_power_slider_update(struct amd_pmf_dev *pdev, u8 event)
+{
+       struct os_power_slider args;
+       struct acpi_buffer params;
+       union acpi_object *info;
+       int err = 0;
+
+       args.size = sizeof(args);
+       args.slider_event = event;
+
+       params.length = sizeof(args);
+       params.pointer = (void *)&args;
+
+       info = apmf_if_call(pdev, APMF_FUNC_OS_POWER_SLIDER_UPDATE, &params);
+       if (!info)
+               err = -EIO;
+
+       kfree(info);
+       return err;
+}
+
 static void apmf_sbios_heartbeat_notify(struct work_struct *work)
 {
        struct amd_pmf_dev *dev = container_of(work, struct amd_pmf_dev, heart_beat.work);
@@ -289,7 +310,7 @@ int apmf_acpi_init(struct amd_pmf_dev *pmf_dev)
 
        ret = apmf_get_system_params(pmf_dev);
        if (ret) {
-               dev_err(pmf_dev->dev, "APMF apmf_get_system_params failed :%d\n", ret);
+               dev_dbg(pmf_dev->dev, "APMF apmf_get_system_params failed :%d\n", ret);
                goto out;
        }
 
index 7780705..57bf1a9 100644 (file)
@@ -40,6 +40,7 @@
 /* List of supported CPU ids */
 #define AMD_CPU_ID_RMB                 0x14b5
 #define AMD_CPU_ID_PS                  0x14e8
+#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT        0x1507
 
 #define PMF_MSG_DELAY_MIN_US           50
 #define RESPONSE_REGISTER_LOOP_MAX     20000
@@ -71,7 +72,11 @@ static int amd_pmf_pwr_src_notify_call(struct notifier_block *nb, unsigned long
                        return NOTIFY_DONE;
        }
 
-       amd_pmf_set_sps_power_limits(pmf);
+       if (is_apmf_func_supported(pmf, APMF_FUNC_STATIC_SLIDER_GRANULAR))
+               amd_pmf_set_sps_power_limits(pmf);
+
+       if (is_apmf_func_supported(pmf, APMF_FUNC_OS_POWER_SLIDER_UPDATE))
+               amd_pmf_power_slider_update_event(pmf);
 
        return NOTIFY_OK;
 }
@@ -242,6 +247,7 @@ out_unlock:
 static const struct pci_device_id pmf_pci_ids[] = {
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RMB) },
        { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) },
+       { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) },
        { }
 };
 
@@ -295,7 +301,8 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev)
        int ret;
 
        /* Enable Static Slider */
-       if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
+       if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR) ||
+           is_apmf_func_supported(dev, APMF_FUNC_OS_POWER_SLIDER_UPDATE)) {
                amd_pmf_init_sps(dev);
                dev->pwr_src_notifier.notifier_call = amd_pmf_pwr_src_notify_call;
                power_supply_reg_notifier(&dev->pwr_src_notifier);
@@ -333,6 +340,7 @@ static void amd_pmf_deinit_features(struct amd_pmf_dev *dev)
 static const struct acpi_device_id amd_pmf_acpi_ids[] = {
        {"AMDI0100", 0x100},
        {"AMDI0102", 0},
+       {"AMDI0103", 0},
        { }
 };
 MODULE_DEVICE_TABLE(acpi, amd_pmf_acpi_ids);
index 06c30cd..deba88e 100644 (file)
@@ -21,6 +21,7 @@
 #define APMF_FUNC_SBIOS_HEARTBEAT                      4
 #define APMF_FUNC_AUTO_MODE                                    5
 #define APMF_FUNC_SET_FAN_IDX                          7
+#define APMF_FUNC_OS_POWER_SLIDER_UPDATE               8
 #define APMF_FUNC_STATIC_SLIDER_GRANULAR       9
 #define APMF_FUNC_DYN_SLIDER_AC                                11
 #define APMF_FUNC_DYN_SLIDER_DC                                12
 #define GET_STT_LIMIT_APU      0x20
 #define GET_STT_LIMIT_HS2      0x21
 
+/* OS slider update notification */
+#define DC_BEST_PERF           0
+#define DC_BETTER_PERF         1
+#define DC_BATTERY_SAVER       3
+#define AC_BEST_PERF           4
+#define AC_BETTER_PERF         5
+#define AC_BETTER_BATTERY      6
+
 /* Fan Index for Auto Mode */
 #define FAN_INDEX_AUTO         0xFFFFFFFF
 
@@ -193,6 +202,11 @@ struct amd_pmf_static_slider_granular {
        struct apmf_sps_prop_granular prop[POWER_SOURCE_MAX][POWER_MODE_MAX];
 };
 
+struct os_power_slider {
+       u16 size;
+       u8 slider_event;
+} __packed;
+
 struct fan_table_control {
        bool manual;
        unsigned long fan_id;
@@ -383,6 +397,7 @@ int amd_pmf_send_cmd(struct amd_pmf_dev *dev, u8 message, bool get, u32 arg, u32
 int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev);
 int amd_pmf_get_power_source(void);
 int apmf_install_handler(struct amd_pmf_dev *pmf_dev);
+int apmf_os_power_slider_update(struct amd_pmf_dev *dev, u8 flag);
 
 /* SPS Layer */
 int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf);
@@ -393,6 +408,7 @@ void amd_pmf_deinit_sps(struct amd_pmf_dev *dev);
 int apmf_get_static_slider_granular(struct amd_pmf_dev *pdev,
                                    struct apmf_static_slider_granular_output *output);
 bool is_pprof_balanced(struct amd_pmf_dev *pmf);
+int amd_pmf_power_slider_update_event(struct amd_pmf_dev *dev);
 
 
 int apmf_update_fan_idx(struct amd_pmf_dev *pdev, bool manual, u32 idx);
index 445ff05..a70e677 100644 (file)
@@ -174,14 +174,78 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf)
        return mode;
 }
 
+int amd_pmf_power_slider_update_event(struct amd_pmf_dev *dev)
+{
+       u8 flag = 0;
+       int mode;
+       int src;
+
+       mode = amd_pmf_get_pprof_modes(dev);
+       if (mode < 0)
+               return mode;
+
+       src = amd_pmf_get_power_source();
+
+       if (src == POWER_SOURCE_AC) {
+               switch (mode) {
+               case POWER_MODE_PERFORMANCE:
+                       flag |= BIT(AC_BEST_PERF);
+                       break;
+               case POWER_MODE_BALANCED_POWER:
+                       flag |= BIT(AC_BETTER_PERF);
+                       break;
+               case POWER_MODE_POWER_SAVER:
+                       flag |= BIT(AC_BETTER_BATTERY);
+                       break;
+               default:
+                       dev_err(dev->dev, "unsupported platform profile\n");
+                       return -EOPNOTSUPP;
+               }
+
+       } else if (src == POWER_SOURCE_DC) {
+               switch (mode) {
+               case POWER_MODE_PERFORMANCE:
+                       flag |= BIT(DC_BEST_PERF);
+                       break;
+               case POWER_MODE_BALANCED_POWER:
+                       flag |= BIT(DC_BETTER_PERF);
+                       break;
+               case POWER_MODE_POWER_SAVER:
+                       flag |= BIT(DC_BATTERY_SAVER);
+                       break;
+               default:
+                       dev_err(dev->dev, "unsupported platform profile\n");
+                       return -EOPNOTSUPP;
+               }
+       }
+
+       apmf_os_power_slider_update(dev, flag);
+
+       return 0;
+}
+
 static int amd_pmf_profile_set(struct platform_profile_handler *pprof,
                               enum platform_profile_option profile)
 {
        struct amd_pmf_dev *pmf = container_of(pprof, struct amd_pmf_dev, pprof);
+       int ret = 0;
 
        pmf->current_profile = profile;
 
-       return amd_pmf_set_sps_power_limits(pmf);
+       /* Notify EC about the slider position change */
+       if (is_apmf_func_supported(pmf, APMF_FUNC_OS_POWER_SLIDER_UPDATE)) {
+               ret = amd_pmf_power_slider_update_event(pmf);
+               if (ret)
+                       return ret;
+       }
+
+       if (is_apmf_func_supported(pmf, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
+               ret = amd_pmf_set_sps_power_limits(pmf);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
 }
 
 int amd_pmf_init_sps(struct amd_pmf_dev *dev)
@@ -189,10 +253,13 @@ int amd_pmf_init_sps(struct amd_pmf_dev *dev)
        int err;
 
        dev->current_profile = PLATFORM_PROFILE_BALANCED;
-       amd_pmf_load_defaults_sps(dev);
 
-       /* update SPS balanced power mode thermals */
-       amd_pmf_set_sps_power_limits(dev);
+       if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
+               amd_pmf_load_defaults_sps(dev);
+
+               /* update SPS balanced power mode thermals */
+               amd_pmf_set_sps_power_limits(dev);
+       }
 
        dev->pprof.profile_get = amd_pmf_profile_get;
        dev->pprof.profile_set = amd_pmf_profile_set;
index 1038dfd..8bef66a 100644 (file)
@@ -738,13 +738,23 @@ static ssize_t kbd_rgb_mode_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t count)
 {
-       u32 cmd, mode, r, g,  b,  speed;
+       u32 cmd, mode, r, g, b, speed;
        int err;
 
        if (sscanf(buf, "%d %d %d %d %d %d", &cmd, &mode, &r, &g, &b, &speed) != 6)
                return -EINVAL;
 
-       cmd = !!cmd;
+       /* B3 is set and B4 is save to BIOS */
+       switch (cmd) {
+       case 0:
+               cmd = 0xb3;
+               break;
+       case 1:
+               cmd = 0xb4;
+               break;
+       default:
+               return -EINVAL;
+       }
 
        /* These are the known usable modes across all TUF/ROG */
        if (mode >= 12 || mode == 9)
index 2750dee..db1e924 100644 (file)
@@ -616,7 +616,8 @@ static int dell_wmi_ddv_hwmon_add(struct dell_wmi_ddv_data *data)
        }
 
        if (index < 2) {
-               ret = -ENODEV;
+               /* Finding no available sensors is not an error */
+               ret = 0;
 
                goto err_release;
        }
@@ -841,13 +842,13 @@ static int dell_wmi_ddv_probe(struct wmi_device *wdev, const void *context)
 
        if (IS_REACHABLE(CONFIG_ACPI_BATTERY)) {
                ret = dell_wmi_ddv_battery_add(data);
-               if (ret < 0 && ret != -ENODEV)
+               if (ret < 0)
                        dev_warn(&wdev->dev, "Unable to register ACPI battery hook: %d\n", ret);
        }
 
        if (IS_REACHABLE(CONFIG_HWMON)) {
                ret = dell_wmi_ddv_hwmon_add(data);
-               if (ret < 0 && ret != -ENODEV)
+               if (ret < 0)
                        dev_warn(&wdev->dev, "Unable to register hwmon interface: %d\n", ret);
        }
 
index 70e5c4c..0ef1c46 100644 (file)
@@ -85,6 +85,8 @@ static const struct key_entry huawei_wmi_keymap[] = {
        { KE_IGNORE, 0x293, { KEY_KBDILLUMTOGGLE } },
        { KE_IGNORE, 0x294, { KEY_KBDILLUMUP } },
        { KE_IGNORE, 0x295, { KEY_KBDILLUMUP } },
+       // Ignore Ambient Light Sensoring
+       { KE_KEY,    0x2c1, { KEY_RESERVED } },
        { KE_END,        0 }
 };
 
index d2fee9a..6d9297c 100644 (file)
@@ -1049,6 +1049,11 @@ static const struct key_entry ideapad_keymap[] = {
        { KE_IGNORE,    0x03 | IDEAPAD_WMI_KEY },
        /* Customizable Lenovo Hotkey ("star" with 'S' inside) */
        { KE_KEY,       0x01 | IDEAPAD_WMI_KEY, { KEY_FAVORITES } },
+       { KE_KEY,       0x04 | IDEAPAD_WMI_KEY, { KEY_SELECTIVE_SCREENSHOT } },
+       /* Lenovo Support */
+       { KE_KEY,       0x07 | IDEAPAD_WMI_KEY, { KEY_HELP } },
+       { KE_KEY,       0x0e | IDEAPAD_WMI_KEY, { KEY_PICKUP_PHONE } },
+       { KE_KEY,       0x0f | IDEAPAD_WMI_KEY, { KEY_HANGUP_PHONE } },
        /* Dark mode toggle */
        { KE_KEY,       0x13 | IDEAPAD_WMI_KEY, { KEY_PROG1 } },
        /* Sound profile switch */
index 5632bd3..7457ca2 100644 (file)
@@ -150,6 +150,12 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go"),
                },
        },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite Dragonfly G2 Notebook PC"),
+               },
+       },
        { }
 };
 
@@ -620,7 +626,7 @@ static bool button_array_present(struct platform_device *device)
 static int intel_hid_probe(struct platform_device *device)
 {
        acpi_handle handle = ACPI_HANDLE(&device->dev);
-       unsigned long long mode;
+       unsigned long long mode, dummy;
        struct intel_hid_priv *priv;
        acpi_status status;
        int err;
@@ -692,18 +698,15 @@ static int intel_hid_probe(struct platform_device *device)
        if (err)
                goto err_remove_notify;
 
-       if (priv->array) {
-               unsigned long long dummy;
+       intel_button_array_enable(&device->dev, true);
 
-               intel_button_array_enable(&device->dev, true);
-
-               /* Call button load method to enable HID power button */
-               if (!intel_hid_evaluate_method(handle, INTEL_HID_DSM_BTNL_FN,
-                                              &dummy)) {
-                       dev_warn(&device->dev,
-                                "failed to enable HID power button\n");
-               }
-       }
+       /*
+        * Call button load method to enable HID power button
+        * Always do this since it activates events on some devices without
+        * a button array too.
+        */
+       if (!intel_hid_evaluate_method(handle, INTEL_HID_DSM_BTNL_FN, &dummy))
+               dev_warn(&device->dev, "failed to enable HID power button\n");
 
        device_init_wakeup(&device->dev, true);
        /*
index e6ae826..cefd0d8 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/firmware.h>
 #include <asm/cpu.h>
-#include <asm/microcode_intel.h>
+#include <asm/microcode.h>
 
 #include "ifs.h"
 
@@ -56,12 +56,13 @@ struct metadata_header {
 
 static struct metadata_header *find_meta_data(void *ucode, unsigned int meta_type)
 {
+       struct microcode_header_intel *hdr = &((struct microcode_intel *)ucode)->hdr;
        struct metadata_header *meta_header;
        unsigned long data_size, total_meta;
        unsigned long meta_size = 0;
 
-       data_size = get_datasize(ucode);
-       total_meta = ((struct microcode_intel *)ucode)->hdr.metasize;
+       data_size = intel_microcode_get_datasize(hdr);
+       total_meta = hdr->metasize;
        if (!total_meta)
                return NULL;
 
index 61aeca8..ef4b314 100644 (file)
@@ -260,7 +260,7 @@ static_assert(ARRAY_SIZE(skl_int3472_regulator_map_supplies) ==
  * This DMI table contains the name of the second sensor. This is used to add
  * entries for the second sensor to the supply_map.
  */
-const struct dmi_system_id skl_int3472_regulator_second_sensor[] = {
+static const struct dmi_system_id skl_int3472_regulator_second_sensor[] = {
        {
                /* Lenovo Miix 510-12IKB */
                .matches = {
index 5a36b3f..84c175b 100644 (file)
@@ -1123,7 +1123,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = {
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,      icl_core_init),
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          tgl_core_init),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         tgl_core_init),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         tgl_core_init),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,      tgl_core_init),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           adl_core_init),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        tgl_core_init),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          adl_core_init),
index 1f59ac5..08df949 100644 (file)
@@ -335,8 +335,8 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
 
                node = dev_to_node(&_pci_dev->dev);
                if (node == NUMA_NO_NODE) {
-                       pr_info("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n",
-                               cpu, bus_no, dev, fn);
+                       pr_info_once("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n",
+                                    cpu, bus_no, dev, fn);
                        continue;
                }
 
@@ -720,7 +720,7 @@ static struct miscdevice isst_if_char_driver = {
 
 static const struct x86_cpu_id hpm_cpu_ids[] = {
        X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X,     NULL),
-       X86_MATCH_INTEL_FAM6_MODEL(SIERRAFOREST_X,      NULL),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X,    NULL),
        {}
 };
 
index 9c606ee..d1fd6e6 100644 (file)
@@ -356,9 +356,7 @@ static int intel_vsec_tpmi_init(struct auxiliary_device *auxdev)
                if (!pfs_start)
                        pfs_start = res_start;
 
-               pfs->pfs_header.cap_offset *= TPMI_CAP_OFFSET_UNIT;
-
-               pfs->vsec_offset = pfs_start + pfs->pfs_header.cap_offset;
+               pfs->vsec_offset = pfs_start + pfs->pfs_header.cap_offset * TPMI_CAP_OFFSET_UNIT;
 
                /*
                 * Process TPMI_INFO to get PCI device to CPU package ID.
index 4167618..e1fbc35 100644 (file)
@@ -24,6 +24,10 @@ static bool ec_trigger __read_mostly;
 module_param(ec_trigger, bool, 0444);
 MODULE_PARM_DESC(ec_trigger, "Enable EC triggering work-around to force emitting tablet mode events");
 
+static bool force;
+module_param(force, bool, 0444);
+MODULE_PARM_DESC(force, "Force loading on boards without a convertible DMI chassis-type");
+
 static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = {
        {
                /* Lenovo Yoga 7 14ARB7 */
@@ -32,6 +36,27 @@ static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "82QF"),
                },
        },
+       {
+               /* Lenovo Yoga 7 14ACN6 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "82N7"),
+               },
+       },
+       { }
+};
+
+static const struct dmi_system_id allowed_chasis_types_dmi_table[] = {
+       {
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "31" /* Convertible */),
+               },
+       },
+       {
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "32" /* Detachable */),
+               },
+       },
        { }
 };
 
@@ -111,6 +136,13 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx)
        struct input_dev *input_dev;
        int err;
 
+       if (!dmi_check_system(allowed_chasis_types_dmi_table)) {
+               if (force)
+                       dev_info(&wdev->dev, "Force loading Lenovo YMC support\n");
+               else
+                       return -ENODEV;
+       }
+
        ec_trigger |= dmi_check_system(ec_trigger_quirk_dmi_table);
 
        priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL);
index 67367f0..7d33977 100644 (file)
 #define MLXPLAT_CPLD_LPC_REG_PWM_CONTROL_OFFSET        0x37
 #define MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET       0x3a
 #define MLXPLAT_CPLD_LPC_REG_AGGR_MASK_OFFSET  0x3b
-#define MLXPLAT_CPLD_LPC_REG_DBG1_OFFSET       0x3c
-#define MLXPLAT_CPLD_LPC_REG_DBG2_OFFSET       0x3d
-#define MLXPLAT_CPLD_LPC_REG_DBG3_OFFSET       0x3e
-#define MLXPLAT_CPLD_LPC_REG_DBG4_OFFSET       0x3f
 #define MLXPLAT_CPLD_LPC_REG_AGGRLO_OFFSET     0x40
 #define MLXPLAT_CPLD_LPC_REG_AGGRLO_MASK_OFFSET        0x41
 #define MLXPLAT_CPLD_LPC_REG_AGGRCO_OFFSET     0x42
 #define MLXPLAT_CPLD_LPC_REG_LC_SD_EVENT_OFFSET        0xaa
 #define MLXPLAT_CPLD_LPC_REG_LC_SD_MASK_OFFSET 0xab
 #define MLXPLAT_CPLD_LPC_REG_LC_PWR_ON         0xb2
+#define MLXPLAT_CPLD_LPC_REG_DBG1_OFFSET       0xb6
+#define MLXPLAT_CPLD_LPC_REG_DBG2_OFFSET       0xb7
+#define MLXPLAT_CPLD_LPC_REG_DBG3_OFFSET       0xb8
+#define MLXPLAT_CPLD_LPC_REG_DBG4_OFFSET       0xb9
 #define MLXPLAT_CPLD_LPC_REG_GP4_RO_OFFSET     0xc2
 #define MLXPLAT_CPLD_LPC_REG_SPI_CHNL_SELECT   0xc3
 #define MLXPLAT_CPLD_LPC_REG_WD_CLEAR_OFFSET   0xc7
                                         MLXPLAT_CPLD_AGGR_MASK_LC_SDWN)
 #define MLXPLAT_CPLD_LOW_AGGR_MASK_LOW 0xc1
 #define MLXPLAT_CPLD_LOW_AGGR_MASK_ASIC2       BIT(2)
-#define MLXPLAT_CPLD_LOW_AGGR_MASK_PWR_BUT     BIT(4)
+#define MLXPLAT_CPLD_LOW_AGGR_MASK_PWR_BUT     GENMASK(5, 4)
 #define MLXPLAT_CPLD_LOW_AGGR_MASK_I2C BIT(6)
 #define MLXPLAT_CPLD_PSU_MASK          GENMASK(1, 0)
 #define MLXPLAT_CPLD_PWR_MASK          GENMASK(1, 0)
 #define MLXPLAT_CPLD_GWP_MASK          GENMASK(0, 0)
 #define MLXPLAT_CPLD_EROT_MASK         GENMASK(1, 0)
 #define MLXPLAT_CPLD_PWR_BUTTON_MASK   BIT(0)
-#define MLXPLAT_CPLD_LATCH_RST_MASK    BIT(5)
+#define MLXPLAT_CPLD_LATCH_RST_MASK    BIT(6)
 #define MLXPLAT_CPLD_THERMAL1_PDB_MASK BIT(3)
 #define MLXPLAT_CPLD_THERMAL2_PDB_MASK BIT(4)
 #define MLXPLAT_CPLD_INTRUSION_MASK    BIT(6)
@@ -2356,7 +2356,7 @@ mlxplat_mlxcpld_l1_switch_pwr_events_handler(void *handle, enum mlxreg_hotplug_k
                                             u8 action)
 {
        dev_info(&mlxplat_dev->dev, "System shutdown due to short press of power button");
-       kernel_halt();
+       kernel_power_off();
        return 0;
 }
 
@@ -2475,7 +2475,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_l1_switch_events_items[] = {
                .reg = MLXPLAT_CPLD_LPC_REG_PWRB_OFFSET,
                .mask = MLXPLAT_CPLD_PWR_BUTTON_MASK,
                .count = ARRAY_SIZE(mlxplat_mlxcpld_l1_switch_pwr_events_items_data),
-               .inversed = 0,
+               .inversed = 1,
                .health = false,
        },
        {
@@ -2484,7 +2484,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_l1_switch_events_items[] = {
                .reg = MLXPLAT_CPLD_LPC_REG_BRD_OFFSET,
                .mask = MLXPLAT_CPLD_L1_CHA_HEALTH_MASK,
                .count = ARRAY_SIZE(mlxplat_mlxcpld_l1_switch_health_events_items_data),
-               .inversed = 0,
+               .inversed = 1,
                .health = false,
                .ind = 8,
        },
@@ -3677,7 +3677,7 @@ static struct mlxreg_core_data mlxplat_mlxcpld_default_ng_regs_io_data[] = {
        {
                .label = "latch_reset",
                .reg = MLXPLAT_CPLD_LPC_REG_GP1_OFFSET,
-               .mask = GENMASK(7, 0) & ~BIT(5),
+               .mask = GENMASK(7, 0) & ~BIT(6),
                .mode = 0200,
        },
        {
@@ -6238,8 +6238,6 @@ static void mlxplat_i2c_mux_topolgy_exit(struct mlxplat_priv *priv)
                if (priv->pdev_mux[i])
                        platform_device_unregister(priv->pdev_mux[i]);
        }
-
-       mlxplat_post_exit();
 }
 
 static int mlxplat_i2c_main_complition_notify(void *handle, int id)
@@ -6369,6 +6367,7 @@ static void __exit mlxplat_exit(void)
                pm_power_off = NULL;
        mlxplat_pre_exit(priv);
        mlxplat_i2c_main_exit(priv);
+       mlxplat_post_exit();
 }
 module_exit(mlxplat_exit);
 
index ff93986..f26a312 100644 (file)
 #include <linux/seq_file.h>
 #include <linux/string.h>
 
-static const char *const SM_ECO_NAME       = "eco";
-static const char *const SM_COMFORT_NAME   = "comfort";
-static const char *const SM_SPORT_NAME     = "sport";
-static const char *const SM_TURBO_NAME     = "turbo";
-
-static const char *const FM_AUTO_NAME     = "auto";
-static const char *const FM_SILENT_NAME   = "silent";
-static const char *const FM_BASIC_NAME    = "basic";
-static const char *const FM_ADVANCED_NAME = "advanced";
+#define SM_ECO_NAME            "eco"
+#define SM_COMFORT_NAME                "comfort"
+#define SM_SPORT_NAME          "sport"
+#define SM_TURBO_NAME          "turbo"
+
+#define FM_AUTO_NAME           "auto"
+#define FM_SILENT_NAME         "silent"
+#define FM_BASIC_NAME          "basic"
+#define FM_ADVANCED_NAME       "advanced"
 
 static const char * const ALLOWED_FW_0[] __initconst = {
        "14C1EMS1.012",
index 6b18ec5..f4c6c36 100644 (file)
@@ -208,7 +208,7 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask)
                return -EINVAL;
 
        if (quirks->ec_read_only)
-               return -EOPNOTSUPP;
+               return 0;
 
        /* read current device state */
        result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata);
@@ -838,15 +838,15 @@ static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str,
 static void msi_init_rfkill(struct work_struct *ignored)
 {
        if (rfk_wlan) {
-               rfkill_set_sw_state(rfk_wlan, !wlan_s);
+               msi_rfkill_set_state(rfk_wlan, !wlan_s);
                rfkill_wlan_set(NULL, !wlan_s);
        }
        if (rfk_bluetooth) {
-               rfkill_set_sw_state(rfk_bluetooth, !bluetooth_s);
+               msi_rfkill_set_state(rfk_bluetooth, !bluetooth_s);
                rfkill_bluetooth_set(NULL, !bluetooth_s);
        }
        if (rfk_threeg) {
-               rfkill_set_sw_state(rfk_threeg, !threeg_s);
+               msi_rfkill_set_state(rfk_threeg, !threeg_s);
                rfkill_threeg_set(NULL, !threeg_s);
        }
 }
index f3dcbdd..8158e3c 100644 (file)
@@ -21,6 +21,7 @@
 #define IRQ_RESOURCE_NONE      0
 #define IRQ_RESOURCE_GPIO      1
 #define IRQ_RESOURCE_APIC      2
+#define IRQ_RESOURCE_AUTO   3
 
 enum smi_bus_type {
        SMI_I2C,
@@ -52,6 +53,18 @@ static int smi_get_irq(struct platform_device *pdev, struct acpi_device *adev,
        int ret;
 
        switch (inst->flags & IRQ_RESOURCE_TYPE) {
+       case IRQ_RESOURCE_AUTO:
+               ret = acpi_dev_gpio_irq_get(adev, inst->irq_idx);
+               if (ret > 0) {
+                       dev_dbg(&pdev->dev, "Using gpio irq\n");
+                       break;
+               }
+               ret = platform_get_irq(pdev, inst->irq_idx);
+               if (ret > 0) {
+                       dev_dbg(&pdev->dev, "Using platform irq\n");
+                       break;
+               }
+               break;
        case IRQ_RESOURCE_GPIO:
                ret = acpi_dev_gpio_irq_get(adev, inst->irq_idx);
                break;
@@ -307,10 +320,23 @@ static const struct smi_node int3515_data = {
 
 static const struct smi_node cs35l41_hda = {
        .instances = {
-               { "cs35l41-hda", IRQ_RESOURCE_GPIO, 0 },
-               { "cs35l41-hda", IRQ_RESOURCE_GPIO, 0 },
-               { "cs35l41-hda", IRQ_RESOURCE_GPIO, 0 },
-               { "cs35l41-hda", IRQ_RESOURCE_GPIO, 0 },
+               { "cs35l41-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l41-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l41-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l41-hda", IRQ_RESOURCE_AUTO, 0 },
+               {}
+       },
+       .bus_type = SMI_AUTO_DETECT,
+};
+
+static const struct smi_node cs35l56_hda = {
+       .instances = {
+               { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+               { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+               /* a 5th entry is an alias address, not a real device */
+               { "cs35l56-hda_dummy_dev" },
                {}
        },
        .bus_type = SMI_AUTO_DETECT,
@@ -324,6 +350,7 @@ static const struct acpi_device_id smi_acpi_ids[] = {
        { "BSG1160", (unsigned long)&bsg1160_data },
        { "BSG2150", (unsigned long)&bsg2150_data },
        { "CSC3551", (unsigned long)&cs35l41_hda },
+       { "CSC3556", (unsigned long)&cs35l56_hda },
        { "INT3515", (unsigned long)&int3515_data },
        /* Non-conforming _HID for Cirrus Logic already released */
        { "CLSA0100", (unsigned long)&cs35l41_hda },
index 52d1ce8..7934688 100644 (file)
@@ -719,12 +719,12 @@ static ssize_t cert_to_password_store(struct kobject *kobj,
        /* Format: 'Password,Signature' */
        auth_str = kasprintf(GFP_KERNEL, "%s,%s", passwd, setting->signature);
        if (!auth_str) {
-               kfree(passwd);
+               kfree_sensitive(passwd);
                return -ENOMEM;
        }
        ret = tlmi_simple_call(LENOVO_CERT_TO_PASSWORD_GUID, auth_str);
        kfree(auth_str);
-       kfree(passwd);
+       kfree_sensitive(passwd);
 
        return ret ?: count;
 }
index 187018f..ad46041 100644 (file)
@@ -315,17 +315,12 @@ struct ibm_init_struct {
 /* DMI Quirks */
 struct quirk_entry {
        bool btusb_bug;
-       u32 s2idle_bug_mmio;
 };
 
 static struct quirk_entry quirk_btusb_bug = {
        .btusb_bug = true,
 };
 
-static struct quirk_entry quirk_s2idle_bug = {
-       .s2idle_bug_mmio = 0xfed80380,
-};
-
 static struct {
        u32 bluetooth:1;
        u32 hotkey:1;
@@ -4422,136 +4417,9 @@ static const struct dmi_system_id fwbug_list[] __initconst = {
                        DMI_MATCH(DMI_BOARD_NAME, "20MV"),
                },
        },
-       {
-               .ident = "L14 Gen2 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20X5"),
-               }
-       },
-       {
-               .ident = "T14s Gen2 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20XF"),
-               }
-       },
-       {
-               .ident = "X13 Gen2 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20XH"),
-               }
-       },
-       {
-               .ident = "T14 Gen2 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20XK"),
-               }
-       },
-       {
-               .ident = "T14 Gen1 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20UD"),
-               }
-       },
-       {
-               .ident = "T14 Gen1 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20UE"),
-               }
-       },
-       {
-               .ident = "T14s Gen1 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20UH"),
-               }
-       },
-       {
-               .ident = "T14s Gen1 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20UJ"),
-               }
-       },
-       {
-               .ident = "P14s Gen1 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "20Y1"),
-               }
-       },
-       {
-               .ident = "P14s Gen2 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "21A0"),
-               }
-       },
-       {
-               .ident = "P14s Gen2 AMD",
-               .driver_data = &quirk_s2idle_bug,
-               .matches = {
-                       DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "21A1"),
-               }
-       },
        {}
 };
 
-#ifdef CONFIG_SUSPEND
-/*
- * Lenovo laptops from a variety of generations run a SMI handler during the D3->D0
- * transition that occurs specifically when exiting suspend to idle which can cause
- * large delays during resume when the IOMMU translation layer is enabled (the default
- * behavior) for NVME devices:
- *
- * To avoid this firmware problem, skip the SMI handler on these machines before the
- * D0 transition occurs.
- */
-static void thinkpad_acpi_amd_s2idle_restore(void)
-{
-       struct resource *res;
-       void __iomem *addr;
-       u8 val;
-
-       res = request_mem_region_muxed(tp_features.quirks->s2idle_bug_mmio, 1,
-                                       "thinkpad_acpi_pm80");
-       if (!res)
-               return;
-
-       addr = ioremap(tp_features.quirks->s2idle_bug_mmio, 1);
-       if (!addr)
-               goto cleanup_resource;
-
-       val = ioread8(addr);
-       iowrite8(val & ~BIT(0), addr);
-
-       iounmap(addr);
-cleanup_resource:
-       release_resource(res);
-       kfree(res);
-}
-
-static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = {
-       .restore = thinkpad_acpi_amd_s2idle_restore,
-};
-#endif
-
 static const struct pci_device_id fwbug_cards_ids[] __initconst = {
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24F3) },
        { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x24FD) },
@@ -11668,10 +11536,6 @@ static void thinkpad_acpi_module_exit(void)
 
        tpacpi_lifecycle = TPACPI_LIFE_EXITING;
 
-#ifdef CONFIG_SUSPEND
-       if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio)
-               acpi_unregister_lps0_dev(&thinkpad_acpi_s2idle_dev_ops);
-#endif
        if (tpacpi_hwmon)
                hwmon_device_unregister(tpacpi_hwmon);
        if (tp_features.sensors_pdrv_registered)
@@ -11861,13 +11725,6 @@ static int __init thinkpad_acpi_module_init(void)
                tp_features.input_device_registered = 1;
        }
 
-#ifdef CONFIG_SUSPEND
-       if (tp_features.quirks && tp_features.quirks->s2idle_bug_mmio) {
-               if (!acpi_register_lps0_dev(&thinkpad_acpi_s2idle_dev_ops))
-                       pr_info("Using s2idle quirk to avoid %s platform firmware bug\n",
-                               (dmi_id && dmi_id->ident) ? dmi_id->ident : "");
-       }
-#endif
        return 0;
 }
 
index 68e66b6..f9301a9 100644 (file)
@@ -26,6 +26,22 @@ struct ts_dmi_data {
 
 /* NOTE: Please keep all entries sorted alphabetically */
 
+static const struct property_entry archos_101_cesium_educ_props[] = {
+       PROPERTY_ENTRY_U32("touchscreen-size-x", 1850),
+       PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
+       PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
+       PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
+       PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+       PROPERTY_ENTRY_BOOL("silead,home-button"),
+       PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-archos-101-cesium-educ.fw"),
+       { }
+};
+
+static const struct ts_dmi_data archos_101_cesium_educ_data = {
+       .acpi_name      = "MSSL1680:00",
+       .properties     = archos_101_cesium_educ_props,
+};
+
 static const struct property_entry chuwi_hi8_props[] = {
        PROPERTY_ENTRY_U32("touchscreen-size-x", 1665),
        PROPERTY_ENTRY_U32("touchscreen-size-y", 1140),
@@ -1048,6 +1064,13 @@ static const struct ts_dmi_data vinga_twizzle_j116_data = {
 /* NOTE: Please keep this table sorted alphabetically */
 const struct dmi_system_id touchscreen_dmi_table[] = {
        {
+               /* Archos 101 Cesium Educ */
+               .driver_data = (void *)&archos_101_cesium_educ_data,
+               .matches = {
+                       DMI_MATCH(DMI_PRODUCT_NAME, "ARCHOS 101 Cesium Educ"),
+               },
+       },
+       {
                /* Chuwi Hi8 */
                .driver_data = (void *)&chuwi_hi8_data,
                .matches = {
index 5b95d7a..a78ddd8 100644 (file)
@@ -136,6 +136,16 @@ static acpi_status find_guid(const char *guid_string, struct wmi_block **out)
        return AE_NOT_FOUND;
 }
 
+static bool guid_parse_and_compare(const char *string, const guid_t *guid)
+{
+       guid_t guid_input;
+
+       if (guid_parse(string, &guid_input))
+               return false;
+
+       return guid_equal(&guid_input, guid);
+}
+
 static const void *find_guid_context(struct wmi_block *wblock,
                                     struct wmi_driver *wdriver)
 {
@@ -146,11 +156,7 @@ static const void *find_guid_context(struct wmi_block *wblock,
                return NULL;
 
        while (*id->guid_string) {
-               guid_t guid_input;
-
-               if (guid_parse(id->guid_string, &guid_input))
-                       continue;
-               if (guid_equal(&wblock->gblock.guid, &guid_input))
+               if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid))
                        return id->context;
                id++;
        }
@@ -895,11 +901,7 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver)
                return 0;
 
        while (*id->guid_string) {
-               guid_t driver_guid;
-
-               if (WARN_ON(guid_parse(id->guid_string, &driver_guid)))
-                       continue;
-               if (guid_equal(&driver_guid, &wblock->gblock.guid))
+               if (guid_parse_and_compare(id->guid_string, &wblock->gblock.guid))
                        return 1;
 
                id++;
@@ -1239,11 +1241,7 @@ static bool guid_already_parsed_for_legacy(struct acpi_device *device, const gui
        list_for_each_entry(wblock, &wmi_block_list, list) {
                /* skip warning and register if we know the driver will use struct wmi_driver */
                for (int i = 0; allow_duplicates[i] != NULL; i++) {
-                       guid_t tmp;
-
-                       if (guid_parse(allow_duplicates[i], &tmp))
-                               continue;
-                       if (guid_equal(&tmp, guid))
+                       if (guid_parse_and_compare(allow_duplicates[i], guid))
                                return false;
                }
                if (guid_equal(&wblock->gblock.guid, guid)) {
index 38928ff..6ab272c 100644 (file)
@@ -254,6 +254,9 @@ static int __init pnpacpi_add_device(struct acpi_device *device)
        else
                strncpy(dev->name, acpi_device_bid(device), sizeof(dev->name));
 
+       /* Handle possible string truncation */
+       dev->name[sizeof(dev->name) - 1] = '\0';
+
        if (dev->active)
                pnpacpi_parse_allocated_resource(dev);
 
index 5231f6d..a081f17 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/powercap.h>
 #include <linux/scmi_protocol.h>
+#include <linux/slab.h>
 
 #define to_scmi_powercap_zone(z)               \
        container_of(z, struct scmi_powercap_zone, zone)
@@ -19,6 +20,8 @@
 static const struct scmi_powercap_proto_ops *powercap_ops;
 
 struct scmi_powercap_zone {
+       bool registered;
+       bool invalid;
        unsigned int height;
        struct device *dev;
        struct scmi_protocol_handle *ph;
@@ -32,6 +35,7 @@ struct scmi_powercap_root {
        unsigned int num_zones;
        struct scmi_powercap_zone *spzones;
        struct list_head *registered_zones;
+       struct list_head scmi_zones;
 };
 
 static struct powercap_control_type *scmi_top_pcntrl;
@@ -271,12 +275,6 @@ static void scmi_powercap_unregister_all_zones(struct scmi_powercap_root *pr)
        }
 }
 
-static inline bool
-scmi_powercap_is_zone_registered(struct scmi_powercap_zone *spz)
-{
-       return !list_empty(&spz->node);
-}
-
 static inline unsigned int
 scmi_powercap_get_zone_height(struct scmi_powercap_zone *spz)
 {
@@ -295,11 +293,46 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
        return &spz->spzones[spz->info->parent_id];
 }
 
+static int scmi_powercap_register_zone(struct scmi_powercap_root *pr,
+                                      struct scmi_powercap_zone *spz,
+                                      struct scmi_powercap_zone *parent)
+{
+       int ret = 0;
+       struct powercap_zone *z;
+
+       if (spz->invalid) {
+               list_del(&spz->node);
+               return -EINVAL;
+       }
+
+       z = powercap_register_zone(&spz->zone, scmi_top_pcntrl, spz->info->name,
+                                  parent ? &parent->zone : NULL,
+                                  &zone_ops, 1, &constraint_ops);
+       if (!IS_ERR(z)) {
+               spz->height = scmi_powercap_get_zone_height(spz);
+               spz->registered = true;
+               list_move(&spz->node, &pr->registered_zones[spz->height]);
+               dev_dbg(spz->dev, "Registered node %s - parent %s - height:%d\n",
+                       spz->info->name, parent ? parent->info->name : "ROOT",
+                       spz->height);
+       } else {
+               list_del(&spz->node);
+               ret = PTR_ERR(z);
+               dev_err(spz->dev,
+                       "Error registering node:%s - parent:%s - h:%d - ret:%d\n",
+                       spz->info->name,
+                       parent ? parent->info->name : "ROOT",
+                       spz->height, ret);
+       }
+
+       return ret;
+}
+
 /**
- * scmi_powercap_register_zone  - Register an SCMI powercap zone recursively
+ * scmi_zones_register- Register SCMI powercap zones starting from parent zones
  *
+ * @dev: A reference to the SCMI device
  * @pr: A reference to the root powercap zones descriptors
- * @spz: A reference to the SCMI powercap zone to register
  *
  * When registering SCMI powercap zones with the powercap framework we should
  * take care to always register zones starting from the root ones and to
@@ -309,10 +342,10 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
  * zones provided by the SCMI platform firmware is built to comply with such
  * requirement.
  *
- * This function, given an SCMI powercap zone to register, takes care to walk
- * the SCMI powercap zones tree up to the root looking recursively for
- * unregistered parent zones before registering the provided zone; at the same
- * time each registered zone height in such a tree is accounted for and each
+ * This function, given the set of SCMI powercap zones to register, takes care
+ * to walk the SCMI powercap zones trees up to the root registering any
+ * unregistered parent zone before registering the child zones; at the same
+ * time each registered-zone height in such a tree is accounted for and each
  * zone, once registered, is stored in the @registered_zones array that is
  * indexed by zone height: this way will be trivial, at unregister time, to walk
  * the @registered_zones array backward and unregister all the zones starting
@@ -330,57 +363,55 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
  *
  * Return: 0 on Success
  */
-static int scmi_powercap_register_zone(struct scmi_powercap_root *pr,
-                                      struct scmi_powercap_zone *spz)
+static int scmi_zones_register(struct device *dev,
+                              struct scmi_powercap_root *pr)
 {
        int ret = 0;
-       struct scmi_powercap_zone *parent;
-
-       if (!spz->info)
-               return ret;
+       unsigned int sp = 0, reg_zones = 0;
+       struct scmi_powercap_zone *spz, **zones_stack;
 
-       parent = scmi_powercap_get_parent_zone(spz);
-       if (parent && !scmi_powercap_is_zone_registered(parent)) {
-               /*
-                * Bail out if a parent domain was marked as unsupported:
-                * only domains participating as leaves can be skipped.
-                */
-               if (!parent->info)
-                       return -ENODEV;
+       zones_stack = kcalloc(pr->num_zones, sizeof(spz), GFP_KERNEL);
+       if (!zones_stack)
+               return -ENOMEM;
 
-               ret = scmi_powercap_register_zone(pr, parent);
-               if (ret)
-                       return ret;
-       }
+       spz = list_first_entry_or_null(&pr->scmi_zones,
+                                      struct scmi_powercap_zone, node);
+       while (spz) {
+               struct scmi_powercap_zone *parent;
 
-       if (!scmi_powercap_is_zone_registered(spz)) {
-               struct powercap_zone *z;
-
-               z = powercap_register_zone(&spz->zone,
-                                          scmi_top_pcntrl,
-                                          spz->info->name,
-                                          parent ? &parent->zone : NULL,
-                                          &zone_ops, 1, &constraint_ops);
-               if (!IS_ERR(z)) {
-                       spz->height = scmi_powercap_get_zone_height(spz);
-                       list_add(&spz->node,
-                                &pr->registered_zones[spz->height]);
-                       dev_dbg(spz->dev,
-                               "Registered node %s - parent %s - height:%d\n",
-                               spz->info->name,
-                               parent ? parent->info->name : "ROOT",
-                               spz->height);
-                       ret = 0;
+               parent = scmi_powercap_get_parent_zone(spz);
+               if (parent && !parent->registered) {
+                       zones_stack[sp++] = spz;
+                       spz = parent;
                } else {
-                       ret = PTR_ERR(z);
-                       dev_err(spz->dev,
-                               "Error registering node:%s - parent:%s - h:%d - ret:%d\n",
-                                spz->info->name,
-                                parent ? parent->info->name : "ROOT",
-                                spz->height, ret);
+                       ret = scmi_powercap_register_zone(pr, spz, parent);
+                       if (!ret) {
+                               reg_zones++;
+                       } else if (sp) {
+                               /* Failed to register a non-leaf zone.
+                                * Bail-out.
+                                */
+                               dev_err(dev,
+                                       "Failed to register non-leaf zone - ret:%d\n",
+                                       ret);
+                               scmi_powercap_unregister_all_zones(pr);
+                               reg_zones = 0;
+                               goto out;
+                       }
+                       /* Pick next zone to process */
+                       if (sp)
+                               spz = zones_stack[--sp];
+                       else
+                               spz = list_first_entry_or_null(&pr->scmi_zones,
+                                                              struct scmi_powercap_zone,
+                                                              node);
                }
        }
 
+out:
+       kfree(zones_stack);
+       dev_info(dev, "Registered %d SCMI Powercap domains !\n", reg_zones);
+
        return ret;
 }
 
@@ -424,6 +455,8 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
        if (!pr->registered_zones)
                return -ENOMEM;
 
+       INIT_LIST_HEAD(&pr->scmi_zones);
+
        for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) {
                /*
                 * Powercap domains are validate by the protocol layer, i.e.
@@ -438,6 +471,7 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
                INIT_LIST_HEAD(&spz->node);
                INIT_LIST_HEAD(&pr->registered_zones[i]);
 
+               list_add_tail(&spz->node, &pr->scmi_zones);
                /*
                 * Forcibly skip powercap domains using an abstract scale.
                 * Note that only leaves domains can be skipped, so this could
@@ -448,7 +482,7 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
                        dev_warn(dev,
                                 "Abstract power scale not supported. Skip %s.\n",
                                 spz->info->name);
-                       spz->info = NULL;
+                       spz->invalid = true;
                        continue;
                }
        }
@@ -457,21 +491,12 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
         * Scan array of retrieved SCMI powercap domains and register them
         * recursively starting from the root domains.
         */
-       for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) {
-               ret = scmi_powercap_register_zone(pr, spz);
-               if (ret) {
-                       dev_err(dev,
-                               "Failed to register powercap zone %s - ret:%d\n",
-                               spz->info->name, ret);
-                       scmi_powercap_unregister_all_zones(pr);
-                       return ret;
-               }
-       }
+       ret = scmi_zones_register(dev, pr);
+       if (ret)
+               return ret;
 
        dev_set_drvdata(dev, pr);
 
-       dev_info(dev, "Registered %d SCMI Powercap domains !\n", pr->num_zones);
-
        return ret;
 }
 
index 4e646e5..5c2e6d5 100644 (file)
@@ -818,7 +818,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
                return -EINVAL;
 
        ra.reg = rd->regs[rpi->id];
-       if (!ra.reg)
+       if (!ra.reg.val)
                return -EINVAL;
 
        /* non-hardware data are collected by the polling thread */
@@ -830,7 +830,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
        ra.mask = rpi->mask;
 
        if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
-               pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg, rd->rp->name, rd->name);
+               pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
                return -EIO;
        }
 
@@ -920,7 +920,7 @@ static int rapl_check_unit_core(struct rapl_domain *rd)
        ra.mask = ~0;
        if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
                pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
-                       ra.reg, rd->rp->name, rd->name);
+                       ra.reg.val, rd->rp->name, rd->name);
                return -ENODEV;
        }
 
@@ -948,7 +948,7 @@ static int rapl_check_unit_atom(struct rapl_domain *rd)
        ra.mask = ~0;
        if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
                pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
-                       ra.reg, rd->rp->name, rd->name);
+                       ra.reg.val, rd->rp->name, rd->name);
                return -ENODEV;
        }
 
@@ -1135,7 +1135,7 @@ static int rapl_check_unit_tpmi(struct rapl_domain *rd)
        ra.mask = ~0;
        if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
                pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
-                       ra.reg, rd->rp->name, rd->name);
+                       ra.reg.val, rd->rp->name, rd->name);
                return -ENODEV;
        }
 
@@ -1250,7 +1250,7 @@ static const struct x86_cpu_id rapl_ids[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE,          &rapl_defaults_core),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE,           &rapl_defaults_core),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L,         &rapl_defaults_core),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N,         &rapl_defaults_core),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT,      &rapl_defaults_core),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE,          &rapl_defaults_core),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P,        &rapl_defaults_core),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S,        &rapl_defaults_core),
@@ -1411,8 +1411,8 @@ static int rapl_get_domain_unit(struct rapl_domain *rd)
        struct rapl_defaults *defaults = get_defaults(rd->rp);
        int ret;
 
-       if (!rd->regs[RAPL_DOMAIN_REG_UNIT]) {
-               if (!rd->rp->priv->reg_unit) {
+       if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) {
+               if (!rd->rp->priv->reg_unit.val) {
                        pr_err("No valid Unit register found\n");
                        return -ENODEV;
                }
@@ -1485,7 +1485,7 @@ static int rapl_detect_domains(struct rapl_package *rp)
        }
        pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
 
-       rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain),
+       rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain),
                              GFP_KERNEL);
        if (!rp->domains)
                return -ENOMEM;
index 569e25e..250bd41 100644 (file)
@@ -34,28 +34,32 @@ static struct rapl_if_priv *rapl_msr_priv;
 
 static struct rapl_if_priv rapl_msr_priv_intel = {
        .type = RAPL_IF_MSR,
-       .reg_unit = MSR_RAPL_POWER_UNIT,
-       .regs[RAPL_DOMAIN_PACKAGE] = {
-               MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO },
-       .regs[RAPL_DOMAIN_PP0] = {
-               MSR_PP0_POWER_LIMIT, MSR_PP0_ENERGY_STATUS, 0, MSR_PP0_POLICY, 0 },
-       .regs[RAPL_DOMAIN_PP1] = {
-               MSR_PP1_POWER_LIMIT, MSR_PP1_ENERGY_STATUS, 0, MSR_PP1_POLICY, 0 },
-       .regs[RAPL_DOMAIN_DRAM] = {
-               MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO },
-       .regs[RAPL_DOMAIN_PLATFORM] = {
-               MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0},
+       .reg_unit.msr = MSR_RAPL_POWER_UNIT,
+       .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_LIMIT].msr   = MSR_PKG_POWER_LIMIT,
+       .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_STATUS].msr  = MSR_PKG_ENERGY_STATUS,
+       .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PERF].msr    = MSR_PKG_PERF_STATUS,
+       .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_INFO].msr    = MSR_PKG_POWER_INFO,
+       .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_LIMIT].msr       = MSR_PP0_POWER_LIMIT,
+       .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_STATUS].msr      = MSR_PP0_ENERGY_STATUS,
+       .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_POLICY].msr      = MSR_PP0_POLICY,
+       .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_LIMIT].msr       = MSR_PP1_POWER_LIMIT,
+       .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_STATUS].msr      = MSR_PP1_ENERGY_STATUS,
+       .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_POLICY].msr      = MSR_PP1_POLICY,
+       .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_LIMIT].msr      = MSR_DRAM_POWER_LIMIT,
+       .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_STATUS].msr     = MSR_DRAM_ENERGY_STATUS,
+       .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_PERF].msr       = MSR_DRAM_PERF_STATUS,
+       .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_INFO].msr       = MSR_DRAM_POWER_INFO,
+       .regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT].msr  = MSR_PLATFORM_POWER_LIMIT,
+       .regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS].msr = MSR_PLATFORM_ENERGY_STATUS,
        .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
        .limits[RAPL_DOMAIN_PLATFORM] = BIT(POWER_LIMIT2),
 };
 
 static struct rapl_if_priv rapl_msr_priv_amd = {
        .type = RAPL_IF_MSR,
-       .reg_unit = MSR_AMD_RAPL_POWER_UNIT,
-       .regs[RAPL_DOMAIN_PACKAGE] = {
-               0, MSR_AMD_PKG_ENERGY_STATUS, 0, 0, 0 },
-       .regs[RAPL_DOMAIN_PP0] = {
-               0, MSR_AMD_CORE_ENERGY_STATUS, 0, 0, 0 },
+       .reg_unit.msr = MSR_AMD_RAPL_POWER_UNIT,
+       .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_STATUS].msr  = MSR_AMD_PKG_ENERGY_STATUS,
+       .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_STATUS].msr      = MSR_AMD_CORE_ENERGY_STATUS,
 };
 
 /* Handles CPU hotplug on multi-socket systems.
@@ -99,10 +103,8 @@ static int rapl_cpu_down_prep(unsigned int cpu)
 
 static int rapl_msr_read_raw(int cpu, struct reg_action *ra)
 {
-       u32 msr = (u32)ra->reg;
-
-       if (rdmsrl_safe_on_cpu(cpu, msr, &ra->value)) {
-               pr_debug("failed to read msr 0x%x on cpu %d\n", msr, cpu);
+       if (rdmsrl_safe_on_cpu(cpu, ra->reg.msr, &ra->value)) {
+               pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg.msr, cpu);
                return -EIO;
        }
        ra->value &= ra->mask;
@@ -112,17 +114,16 @@ static int rapl_msr_read_raw(int cpu, struct reg_action *ra)
 static void rapl_msr_update_func(void *info)
 {
        struct reg_action *ra = info;
-       u32 msr = (u32)ra->reg;
        u64 val;
 
-       ra->err = rdmsrl_safe(msr, &val);
+       ra->err = rdmsrl_safe(ra->reg.msr, &val);
        if (ra->err)
                return;
 
        val &= ~ra->mask;
        val |= ra->value;
 
-       ra->err = wrmsrl_safe(msr, val);
+       ra->err = wrmsrl_safe(ra->reg.msr, val);
 }
 
 static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
@@ -141,7 +142,7 @@ static const struct x86_cpu_id pl4_support_ids[] = {
        X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, NULL),
@@ -171,7 +172,7 @@ static int rapl_msr_probe(struct platform_device *pdev)
 
        if (id) {
                rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4);
-               rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] =
+               rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4].msr =
                        MSR_VR_CURRENT_CONFIG;
                pr_info("PL4 support detected.\n");
        }
index 4f4f13d..891c90f 100644 (file)
@@ -59,10 +59,10 @@ static struct powercap_control_type *tpmi_control_type;
 
 static int tpmi_rapl_read_raw(int id, struct reg_action *ra)
 {
-       if (!ra->reg)
+       if (!ra->reg.mmio)
                return -EINVAL;
 
-       ra->value = readq((void __iomem *)ra->reg);
+       ra->value = readq(ra->reg.mmio);
 
        ra->value &= ra->mask;
        return 0;
@@ -72,15 +72,15 @@ static int tpmi_rapl_write_raw(int id, struct reg_action *ra)
 {
        u64 val;
 
-       if (!ra->reg)
+       if (!ra->reg.mmio)
                return -EINVAL;
 
-       val = readq((void __iomem *)ra->reg);
+       val = readq(ra->reg.mmio);
 
        val &= ~ra->mask;
        val |= ra->value;
 
-       writeq(val, (void __iomem *)ra->reg);
+       writeq(val, ra->reg.mmio);
        return 0;
 }
 
@@ -138,8 +138,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
        enum tpmi_rapl_register reg_index;
        enum rapl_domain_reg_id reg_id;
        int tpmi_domain_size, tpmi_domain_flags;
-       u64 *tpmi_rapl_regs = trp->base + offset;
-       u64 tpmi_domain_header = readq((void __iomem *)tpmi_rapl_regs);
+       u64 tpmi_domain_header = readq(trp->base + offset);
 
        /* Domain Parent bits are ignored for now */
        tpmi_domain_version = tpmi_domain_header & 0xff;
@@ -180,7 +179,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
                return -EINVAL;
        }
 
-       if (trp->priv.regs[domain_type][RAPL_DOMAIN_REG_UNIT]) {
+       if (trp->priv.regs[domain_type][RAPL_DOMAIN_REG_UNIT].mmio) {
                pr_warn(FW_BUG "Duplicate Domain type %d\n", tpmi_domain_type);
                return -EINVAL;
        }
@@ -218,7 +217,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
                default:
                        continue;
                }
-               trp->priv.regs[domain_type][reg_id] = (u64)&tpmi_rapl_regs[reg_index];
+               trp->priv.regs[domain_type][reg_id].mmio = trp->base + offset + reg_index * 8;
        }
 
        return 0;
index c5dd77b..a062166 100644 (file)
@@ -1028,9 +1028,12 @@ static int da9063_regulator_probe(struct platform_device *pdev)
                        config.of_node = da9063_reg_matches[id].of_node;
                config.regmap = da9063->regmap;
 
-               ret = da9063_check_xvp_constraints(&config);
-               if (ret)
-                       return ret;
+               /* Checking constraints requires init_data from DT. */
+               if (config.init_data) {
+                       ret = da9063_check_xvp_constraints(&config);
+                       if (ret)
+                               return ret;
+               }
 
                regl->rdev = devm_regulator_register(&pdev->dev, &regl->desc,
                                                     &config);
index 31a16fb..b9cda22 100644 (file)
@@ -661,7 +661,7 @@ static int mt6358_sync_vcn33_setting(struct device *dev)
        /* Disable VCN33_WIFI */
        ret = regmap_update_bits(mt6397->regmap, MT6358_LDO_VCN33_CON0_1, BIT(0), 0);
        if (ret) {
-               dev_err(dev, "Failed to disable VCN33_BT\n");
+               dev_err(dev, "Failed to disable VCN33_WIFI\n");
                return ret;
        }
 
@@ -676,10 +676,6 @@ static int mt6358_regulator_probe(struct platform_device *pdev)
        const struct mt6358_regulator_info *mt6358_info;
        int i, max_regulator, ret;
 
-       ret = mt6358_sync_vcn33_setting(&pdev->dev);
-       if (ret)
-               return ret;
-
        if (mt6397->chip_id == MT6366_CHIP_ID) {
                max_regulator = MT6366_MAX_REGULATOR;
                mt6358_info = mt6366_regulators;
@@ -688,6 +684,10 @@ static int mt6358_regulator_probe(struct platform_device *pdev)
                mt6358_info = mt6358_regulators;
        }
 
+       ret = mt6358_sync_vcn33_setting(&pdev->dev);
+       if (ret)
+               return ret;
+
        for (i = 0; i < max_regulator; i++) {
                config.dev = &pdev->dev;
                config.regmap = mt6397->regmap;
index f3b280a..cd077b7 100644 (file)
@@ -1068,7 +1068,7 @@ static const struct rpmh_vreg_init_data pm8550_vreg_data[] = {
        RPMH_VREG("ldo9",   "ldo%s9",  &pmic5_pldo,    "vdd-l8-l9"),
        RPMH_VREG("ldo10",  "ldo%s10", &pmic5_nldo515,    "vdd-l1-l4-l10"),
        RPMH_VREG("ldo11",  "ldo%s11", &pmic5_nldo515,    "vdd-l11"),
-       RPMH_VREG("ldo12",  "ldo%s12", &pmic5_pldo,    "vdd-l12"),
+       RPMH_VREG("ldo12",  "ldo%s12", &pmic5_nldo515,    "vdd-l12"),
        RPMH_VREG("ldo13",  "ldo%s13", &pmic5_pldo,    "vdd-l2-l13-l14"),
        RPMH_VREG("ldo14",  "ldo%s14", &pmic5_pldo,    "vdd-l2-l13-l14"),
        RPMH_VREG("ldo15",  "ldo%s15", &pmic5_nldo515,    "vdd-l15"),
index edcbf77..215597f 100644 (file)
@@ -2943,41 +2943,32 @@ static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data)
  * Requeue a request back to the block request queue
  * only works for block requests
  */
-static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
+static void _dasd_requeue_request(struct dasd_ccw_req *cqr)
 {
-       struct dasd_block *block = cqr->block;
        struct request *req;
 
-       if (!block)
-               return -EINVAL;
        /*
         * If the request is an ERP request there is nothing to requeue.
         * This will be done with the remaining original request.
         */
        if (cqr->refers)
-               return 0;
+               return;
        spin_lock_irq(&cqr->dq->lock);
        req = (struct request *) cqr->callback_data;
        blk_mq_requeue_request(req, true);
        spin_unlock_irq(&cqr->dq->lock);
 
-       return 0;
+       return;
 }
 
-/*
- * Go through all request on the dasd_block request queue, cancel them
- * on the respective dasd_device, and return them to the generic
- * block layer.
- */
-static int dasd_flush_block_queue(struct dasd_block *block)
+static int _dasd_requests_to_flushqueue(struct dasd_block *block,
+                                       struct list_head *flush_queue)
 {
        struct dasd_ccw_req *cqr, *n;
-       int rc, i;
-       struct list_head flush_queue;
        unsigned long flags;
+       int rc, i;
 
-       INIT_LIST_HEAD(&flush_queue);
-       spin_lock_bh(&block->queue_lock);
+       spin_lock_irqsave(&block->queue_lock, flags);
        rc = 0;
 restart:
        list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) {
@@ -2992,13 +2983,32 @@ restart:
                 * is returned from the dasd_device layer.
                 */
                cqr->callback = _dasd_wake_block_flush_cb;
-               for (i = 0; cqr != NULL; cqr = cqr->refers, i++)
-                       list_move_tail(&cqr->blocklist, &flush_queue);
+               for (i = 0; cqr; cqr = cqr->refers, i++)
+                       list_move_tail(&cqr->blocklist, flush_queue);
                if (i > 1)
                        /* moved more than one request - need to restart */
                        goto restart;
        }
-       spin_unlock_bh(&block->queue_lock);
+       spin_unlock_irqrestore(&block->queue_lock, flags);
+
+       return rc;
+}
+
+/*
+ * Go through all request on the dasd_block request queue, cancel them
+ * on the respective dasd_device, and return them to the generic
+ * block layer.
+ */
+static int dasd_flush_block_queue(struct dasd_block *block)
+{
+       struct dasd_ccw_req *cqr, *n;
+       struct list_head flush_queue;
+       unsigned long flags;
+       int rc;
+
+       INIT_LIST_HEAD(&flush_queue);
+       rc = _dasd_requests_to_flushqueue(block, &flush_queue);
+
        /* Now call the callback function of flushed requests */
 restart_cb:
        list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) {
@@ -3626,11 +3636,8 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
                 * so sync bdev first and then wait for our queues to become
                 * empty
                 */
-               if (device->block) {
-                       rc = fsync_bdev(device->block->bdev);
-                       if (rc != 0)
-                               goto interrupted;
-               }
+               if (device->block)
+                       bdev_mark_dead(device->block->bdev, false);
                dasd_schedule_device_bh(device);
                rc = wait_event_interruptible(shutdown_waitq,
                                              _wait_for_empty_queues(device));
@@ -3881,75 +3888,36 @@ EXPORT_SYMBOL_GPL(dasd_generic_space_avail);
  */
 int dasd_generic_requeue_all_requests(struct dasd_device *device)
 {
+       struct dasd_block *block = device->block;
        struct list_head requeue_queue;
        struct dasd_ccw_req *cqr, *n;
-       struct dasd_ccw_req *refers;
        int rc;
 
-       INIT_LIST_HEAD(&requeue_queue);
-       spin_lock_irq(get_ccwdev_lock(device->cdev));
-       rc = 0;
-       list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
-               /* Check status and move request to flush_queue */
-               if (cqr->status == DASD_CQR_IN_IO) {
-                       rc = device->discipline->term_IO(cqr);
-                       if (rc) {
-                               /* unable to terminate requeust */
-                               dev_err(&device->cdev->dev,
-                                       "Unable to terminate request %p "
-                                       "on suspend\n", cqr);
-                               spin_unlock_irq(get_ccwdev_lock(device->cdev));
-                               dasd_put_device(device);
-                               return rc;
-                       }
-               }
-               list_move_tail(&cqr->devlist, &requeue_queue);
-       }
-       spin_unlock_irq(get_ccwdev_lock(device->cdev));
-
-       list_for_each_entry_safe(cqr, n, &requeue_queue, devlist) {
-               wait_event(dasd_flush_wq,
-                          (cqr->status != DASD_CQR_CLEAR_PENDING));
+       if (!block)
+               return 0;
 
-               /*
-                * requeue requests to blocklayer will only work
-                * for block device requests
-                */
-               if (_dasd_requeue_request(cqr))
-                       continue;
+       INIT_LIST_HEAD(&requeue_queue);
+       rc = _dasd_requests_to_flushqueue(block, &requeue_queue);
 
-               /* remove requests from device and block queue */
-               list_del_init(&cqr->devlist);
-               while (cqr->refers != NULL) {
-                       refers = cqr->refers;
-                       /* remove the request from the block queue */
-                       list_del(&cqr->blocklist);
-                       /* free the finished erp request */
-                       dasd_free_erp_request(cqr, cqr->memdev);
-                       cqr = refers;
+       /* Now call the callback function of flushed requests */
+restart_cb:
+       list_for_each_entry_safe(cqr, n, &requeue_queue, blocklist) {
+               wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED));
+               /* Process finished ERP request. */
+               if (cqr->refers) {
+                       spin_lock_bh(&block->queue_lock);
+                       __dasd_process_erp(block->base, cqr);
+                       spin_unlock_bh(&block->queue_lock);
+                       /* restart list_for_xx loop since dasd_process_erp
+                        * might remove multiple elements
+                        */
+                       goto restart_cb;
                }
-
-               /*
-                * _dasd_requeue_request already checked for a valid
-                * blockdevice, no need to check again
-                * all erp requests (cqr->refers) have a cqr->block
-                * pointer copy from the original cqr
-                */
+               _dasd_requeue_request(cqr);
                list_del_init(&cqr->blocklist);
                cqr->block->base->discipline->free_cp(
                        cqr, (struct request *) cqr->callback_data);
        }
-
-       /*
-        * if requests remain then they are internal request
-        * and go back to the device queue
-        */
-       if (!list_empty(&requeue_queue)) {
-               /* move freeze_queue to start of the ccw_queue */
-               spin_lock_irq(get_ccwdev_lock(device->cdev));
-               list_splice_tail(&requeue_queue, &device->ccw_queue);
-               spin_unlock_irq(get_ccwdev_lock(device->cdev));
-       }
        dasd_schedule_device_bh(device);
        return rc;
 }
index 9fd36c4..89957bb 100644 (file)
@@ -1050,7 +1050,7 @@ dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense)
                dev_err(&device->cdev->dev, "An I/O request was rejected"
                        " because writing is inhibited\n");
                erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
-       } else if (sense[7] & SNS7_INVALID_ON_SEC) {
+       } else if (sense[7] == SNS7_INVALID_ON_SEC) {
                dev_err(&device->cdev->dev, "An I/O request was rejected on a copy pair secondary device\n");
                /* suppress dump of sense data for this error */
                set_bit(DASD_CQR_SUPPRESS_CR, &erp->refers->flags);
@@ -2441,7 +2441,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr)
        erp->block    = cqr->block;
        erp->magic    = cqr->magic;
        erp->expires  = cqr->expires;
-       erp->retries  = 256;
+       erp->retries  = device->default_retries;
        erp->buildclk = get_tod_clock();
        erp->status = DASD_CQR_FILLED;
 
index 513a7e6..d558626 100644 (file)
@@ -131,6 +131,7 @@ static int dasd_ioctl_resume(struct dasd_block *block)
        spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
 
        dasd_schedule_block_bh(block);
+       dasd_schedule_device_bh(base);
        return 0;
 }
 
index 09acf38..06bcb6c 100644 (file)
@@ -412,6 +412,7 @@ removeseg:
        }
        list_del(&dev_info->lh);
 
+       dax_remove_host(dev_info->gd);
        kill_dax(dev_info->dax_dev);
        put_dax(dev_info->dax_dev);
        del_gendisk(dev_info->gd);
@@ -707,9 +708,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
        goto out;
 
 out_dax_host:
+       put_device(&dev_info->dev);
        dax_remove_host(dev_info->gd);
 out_dax:
-       put_device(&dev_info->dev);
        kill_dax(dev_info->dax_dev);
        put_dax(dev_info->dax_dev);
 put_dev:
@@ -789,6 +790,7 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch
        }
 
        list_del(&dev_info->lh);
+       dax_remove_host(dev_info->gd);
        kill_dax(dev_info->dax_dev);
        put_dax(dev_info->dax_dev);
        del_gendisk(dev_info->gd);
@@ -860,7 +862,7 @@ dcssblk_submit_bio(struct bio *bio)
        struct bio_vec bvec;
        struct bvec_iter iter;
        unsigned long index;
-       unsigned long page_addr;
+       void *page_addr;
        unsigned long source_addr;
        unsigned long bytes_done;
 
@@ -868,8 +870,8 @@ dcssblk_submit_bio(struct bio *bio)
        dev_info = bio->bi_bdev->bd_disk->private_data;
        if (dev_info == NULL)
                goto fail;
-       if ((bio->bi_iter.bi_sector & 7) != 0 ||
-           (bio->bi_iter.bi_size & 4095) != 0)
+       if (!IS_ALIGNED(bio->bi_iter.bi_sector, 8) ||
+           !IS_ALIGNED(bio->bi_iter.bi_size, PAGE_SIZE))
                /* Request is not page-aligned. */
                goto fail;
        /* verify data transfer direction */
@@ -889,18 +891,16 @@ dcssblk_submit_bio(struct bio *bio)
 
        index = (bio->bi_iter.bi_sector >> 3);
        bio_for_each_segment(bvec, bio, iter) {
-               page_addr = (unsigned long)bvec_virt(&bvec);
+               page_addr = bvec_virt(&bvec);
                source_addr = dev_info->start + (index<<12) + bytes_done;
-               if (unlikely((page_addr & 4095) != 0) || (bvec.bv_len & 4095) != 0)
+               if (unlikely(!IS_ALIGNED((unsigned long)page_addr, PAGE_SIZE) ||
+                            !IS_ALIGNED(bvec.bv_len, PAGE_SIZE)))
                        // More paranoia.
                        goto fail;
-               if (bio_data_dir(bio) == READ) {
-                       memcpy((void*)page_addr, (void*)source_addr,
-                               bvec.bv_len);
-               } else {
-                       memcpy((void*)source_addr, (void*)page_addr,
-                               bvec.bv_len);
-               }
+               if (bio_data_dir(bio) == READ)
+                       memcpy(page_addr, __va(source_addr), bvec.bv_len);
+               else
+                       memcpy(__va(source_addr), page_addr, bvec.bv_len);
                bytes_done += bvec.bv_len;
        }
        bio_endio(bio);
index 0c1df1d..3a9cc8a 100644 (file)
@@ -134,7 +134,7 @@ static void scm_request_done(struct scm_request *scmrq)
 
                if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
                    IS_ALIGNED(aidaw, PAGE_SIZE))
-                       mempool_free(virt_to_page(aidaw), aidaw_pool);
+                       mempool_free(virt_to_page((void *)aidaw), aidaw_pool);
        }
 
        spin_lock_irqsave(&list_lock, flags);
index 3c87057..8b4575a 100644 (file)
@@ -392,10 +392,6 @@ static void __init add_memory_merged(u16 rn)
                goto skip_add;
        start = rn2addr(first_rn);
        size = (unsigned long long) num * sclp.rzm;
-       if (start >= VMEM_MAX_PHYS)
-               goto skip_add;
-       if (start + size > VMEM_MAX_PHYS)
-               size = VMEM_MAX_PHYS - start;
        if (start >= ident_map_size)
                goto skip_add;
        if (start + size > ident_map_size)
index f480d6c..fdc8668 100644 (file)
@@ -55,6 +55,7 @@ static void __init sclp_early_facilities_detect(void)
                S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_GUEST;
        if (sccb->cpuoff > 134) {
                sclp.has_diag318 = !!(sccb->byte_134 & 0x80);
+               sclp.has_diag320 = !!(sccb->byte_134 & 0x04);
                sclp.has_iplcc = !!(sccb->byte_134 & 0x02);
        }
        if (sccb->cpuoff > 137) {
index 4cebfaa..eb0520a 100644 (file)
@@ -89,7 +89,7 @@ static void vmcp_response_free(struct vmcp_session *session)
        order = get_order(session->bufsize);
        nr_pages = ALIGN(session->bufsize, PAGE_SIZE) >> PAGE_SHIFT;
        if (session->cma_alloc) {
-               page = virt_to_page((unsigned long)session->response);
+               page = virt_to_page(session->response);
                cma_release(vmcp_cma, page, nr_pages);
                session->cma_alloc = 0;
        } else {
index 942c73a..bc3be03 100644 (file)
@@ -3,7 +3,7 @@
  * zcore module to export memory content and register sets for creating system
  * dumps on SCSI/NVMe disks (zfcp/nvme dump).
  *
- * For more information please refer to Documentation/s390/zfcpdump.rst
+ * For more information please refer to Documentation/arch/s390/zfcpdump.rst
  *
  * Copyright IBM Corp. 2003, 2008
  * Author(s): Michael Holzheu
index 22d2db6..0edacd1 100644 (file)
@@ -11,7 +11,7 @@ zcrypt-objs += zcrypt_msgtype6.o zcrypt_msgtype50.o
 zcrypt-objs += zcrypt_ccamisc.o zcrypt_ep11misc.o
 obj-$(CONFIG_ZCRYPT) += zcrypt.o
 # adapter drivers depend on ap.o and zcrypt.o
-obj-$(CONFIG_ZCRYPT) += zcrypt_cex2c.o zcrypt_cex2a.o zcrypt_cex4.o
+obj-$(CONFIG_ZCRYPT) += zcrypt_cex4.o
 
 # pkey kernel module
 pkey-objs := pkey_api.o
index 420120b..339812e 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * Copyright IBM Corp. 2006, 2021
+ * Copyright IBM Corp. 2006, 2023
  * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
  *           Martin Schwidefsky <schwidefsky@de.ibm.com>
  *           Ralph Wuerthner <rwuerthn@de.ibm.com>
@@ -219,6 +219,15 @@ int ap_sb_available(void)
 }
 
 /*
+ * ap_is_se_guest(): Check for SE guest with AP pass-through support.
+ */
+bool ap_is_se_guest(void)
+{
+       return is_prot_virt_guest() && ap_sb_available();
+}
+EXPORT_SYMBOL(ap_is_se_guest);
+
+/*
  * ap_fetch_qci_info(): Fetch cryptographic config info
  *
  * Returns the ap configuration info fetched via PQAP(QCI).
@@ -387,23 +396,6 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac,
                *q_ml = tapq_info.ml;
                *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
                *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
-               switch (*q_type) {
-                       /* For CEX2 and CEX3 the available functions
-                        * are not reflected by the facilities bits.
-                        * Instead it is coded into the type. So here
-                        * modify the function bits based on the type.
-                        */
-               case AP_DEVICE_TYPE_CEX2A:
-               case AP_DEVICE_TYPE_CEX3A:
-                       *q_fac |= 0x08000000;
-                       break;
-               case AP_DEVICE_TYPE_CEX2C:
-               case AP_DEVICE_TYPE_CEX3C:
-                       *q_fac |= 0x10000000;
-                       break;
-               default:
-                       break;
-               }
                return 1;
        default:
                /*
@@ -1678,8 +1670,8 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
 {
        int comp_type = 0;
 
-       /* < CEX2A is not supported */
-       if (rawtype < AP_DEVICE_TYPE_CEX2A) {
+       /* < CEX4 is not supported */
+       if (rawtype < AP_DEVICE_TYPE_CEX4) {
                AP_DBF_WARN("%s queue=%02x.%04x unsupported type %d\n",
                            __func__, AP_QID_CARD(qid),
                            AP_QID_QUEUE(qid), rawtype);
@@ -1701,7 +1693,7 @@ static int ap_get_compatible_type(ap_qid_t qid, int rawtype, unsigned int func)
                apinfo.cat = AP_DEVICE_TYPE_CEX8;
                status = ap_qact(qid, 0, &apinfo);
                if (status.response_code == AP_RESPONSE_NORMAL &&
-                   apinfo.cat >= AP_DEVICE_TYPE_CEX2A &&
+                   apinfo.cat >= AP_DEVICE_TYPE_CEX4 &&
                    apinfo.cat <= AP_DEVICE_TYPE_CEX8)
                        comp_type = apinfo.cat;
        }
index 0d7b7eb..be54b07 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * Copyright IBM Corp. 2006, 2019
+ * Copyright IBM Corp. 2006, 2023
  * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
  *           Martin Schwidefsky <schwidefsky@de.ibm.com>
  *           Ralph Wuerthner <rwuerthn@de.ibm.com>
@@ -67,15 +67,8 @@ static inline int ap_test_bit(unsigned int *ptr, unsigned int nr)
 #define AP_RESPONSE_INVALID_DOMAIN          0x42
 
 /*
- * Known device types
+ * Supported AP device types
  */
-#define AP_DEVICE_TYPE_PCICC   3
-#define AP_DEVICE_TYPE_PCICA   4
-#define AP_DEVICE_TYPE_PCIXCC  5
-#define AP_DEVICE_TYPE_CEX2A   6
-#define AP_DEVICE_TYPE_CEX2C   7
-#define AP_DEVICE_TYPE_CEX3A   8
-#define AP_DEVICE_TYPE_CEX3C   9
 #define AP_DEVICE_TYPE_CEX4    10
 #define AP_DEVICE_TYPE_CEX5    11
 #define AP_DEVICE_TYPE_CEX6    12
@@ -272,14 +265,6 @@ static inline void ap_release_message(struct ap_message *ap_msg)
        kfree_sensitive(ap_msg->private);
 }
 
-/*
- * Note: don't use ap_send/ap_recv after using ap_queue_message
- * for the first time. Otherwise the ap message queue will get
- * confused.
- */
-int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen);
-int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t msglen);
-
 enum ap_sm_wait ap_sm_event(struct ap_queue *aq, enum ap_sm_event event);
 enum ap_sm_wait ap_sm_event_loop(struct ap_queue *aq, enum ap_sm_event event);
 
@@ -289,6 +274,7 @@ void ap_flush_queue(struct ap_queue *aq);
 
 void *ap_airq_ptr(void);
 int ap_sb_available(void);
+bool ap_is_se_guest(void);
 void ap_wait(enum ap_sm_wait wait);
 void ap_request_timeout(struct timer_list *t);
 void ap_bus_force_rescan(void);
index 30df837..1336e63 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Copyright IBM Corp. 2016
+ * Copyright IBM Corp. 2016, 2023
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  *
  * Adjunct processor bus, queue related code.
@@ -93,51 +93,6 @@ __ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen,
        return ap_nqap(qid, psmid, msg, msglen);
 }
 
-int ap_send(ap_qid_t qid, unsigned long psmid, void *msg, size_t msglen)
-{
-       struct ap_queue_status status;
-
-       status = __ap_send(qid, psmid, msg, msglen, 0);
-       if (status.async)
-               return -EPERM;
-       switch (status.response_code) {
-       case AP_RESPONSE_NORMAL:
-               return 0;
-       case AP_RESPONSE_Q_FULL:
-       case AP_RESPONSE_RESET_IN_PROGRESS:
-               return -EBUSY;
-       case AP_RESPONSE_REQ_FAC_NOT_INST:
-               return -EINVAL;
-       default:        /* Device is gone. */
-               return -ENODEV;
-       }
-}
-EXPORT_SYMBOL(ap_send);
-
-int ap_recv(ap_qid_t qid, unsigned long *psmid, void *msg, size_t msglen)
-{
-       struct ap_queue_status status;
-
-       if (!msg)
-               return -EINVAL;
-       status = ap_dqap(qid, psmid, msg, msglen, NULL, NULL, NULL);
-       if (status.async)
-               return -EPERM;
-       switch (status.response_code) {
-       case AP_RESPONSE_NORMAL:
-               return 0;
-       case AP_RESPONSE_NO_PENDING_REPLY:
-               if (status.queue_empty)
-                       return -ENOENT;
-               return -EBUSY;
-       case AP_RESPONSE_RESET_IN_PROGRESS:
-               return -EBUSY;
-       default:
-               return -ENODEV;
-       }
-}
-EXPORT_SYMBOL(ap_recv);
-
 /* State machine definitions and helpers */
 
 static enum ap_sm_wait ap_sm_nop(struct ap_queue *aq)
index e58bfd2..6cfb6b2 100644 (file)
@@ -263,7 +263,9 @@ static int pkey_clr2ep11key(const u8 *clrkey, size_t clrkeylen,
 
        /* build a list of apqns suitable for ep11 keys with cpacf support */
        rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
-                           ZCRYPT_CEX7, EP11_API_V, NULL);
+                           ZCRYPT_CEX7,
+                           ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4,
+                           NULL);
        if (rc)
                goto out;
 
@@ -272,7 +274,8 @@ static int pkey_clr2ep11key(const u8 *clrkey, size_t clrkeylen,
                card = apqns[i] >> 16;
                dom = apqns[i] & 0xFFFF;
                rc = ep11_clr2keyblob(card, dom, clrkeylen * 8,
-                                     0, clrkey, keybuf, keybuflen);
+                                     0, clrkey, keybuf, keybuflen,
+                                     PKEY_TYPE_EP11);
                if (rc == 0)
                        break;
        }
@@ -287,10 +290,9 @@ out:
 /*
  * Find card and transform EP11 secure key into protected key.
  */
-static int pkey_ep11key2pkey(const u8 *key, u8 *protkey,
-                            u32 *protkeylen, u32 *protkeytype)
+static int pkey_ep11key2pkey(const u8 *key, size_t keylen,
+                            u8 *protkey, u32 *protkeylen, u32 *protkeytype)
 {
-       struct ep11keyblob *kb = (struct ep11keyblob *)key;
        u32 nr_apqns, *apqns = NULL;
        u16 card, dom;
        int i, rc;
@@ -299,7 +301,9 @@ static int pkey_ep11key2pkey(const u8 *key, u8 *protkey,
 
        /* build a list of apqns suitable for this key */
        rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
-                           ZCRYPT_CEX7, EP11_API_V, kb->wkvp);
+                           ZCRYPT_CEX7,
+                           ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4,
+                           ep11_kb_wkvp(key, keylen));
        if (rc)
                goto out;
 
@@ -307,7 +311,7 @@ static int pkey_ep11key2pkey(const u8 *key, u8 *protkey,
        for (rc = -ENODEV, i = 0; i < nr_apqns; i++) {
                card = apqns[i] >> 16;
                dom = apqns[i] & 0xFFFF;
-               rc = ep11_kblob2protkey(card, dom, key, kb->head.len,
+               rc = ep11_kblob2protkey(card, dom, key, keylen,
                                        protkey, protkeylen, protkeytype);
                if (rc == 0)
                        break;
@@ -495,7 +499,7 @@ try_via_ep11:
                              tmpbuf, &tmpbuflen);
        if (rc)
                goto failure;
-       rc = pkey_ep11key2pkey(tmpbuf,
+       rc = pkey_ep11key2pkey(tmpbuf, tmpbuflen,
                               protkey, protkeylen, protkeytype);
        if (!rc)
                goto out;
@@ -611,7 +615,7 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen,
                rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
                if (rc)
                        goto out;
-               rc = pkey_ep11key2pkey(key,
+               rc = pkey_ep11key2pkey(key, keylen,
                                       protkey, protkeylen, protkeytype);
                break;
        }
@@ -620,7 +624,7 @@ static int pkey_nonccatok2pkey(const u8 *key, u32 keylen,
                rc = ep11_check_aes_key_with_hdr(debug_info, 3, key, keylen, 1);
                if (rc)
                        goto out;
-               rc = pkey_ep11key2pkey(key + sizeof(struct ep11kblob_header),
+               rc = pkey_ep11key2pkey(key, keylen,
                                       protkey, protkeylen, protkeytype);
                break;
        default:
@@ -713,6 +717,11 @@ static int pkey_genseckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
                if (*keybufsize < MINEP11AESKEYBLOBSIZE)
                        return -EINVAL;
                break;
+       case PKEY_TYPE_EP11_AES:
+               if (*keybufsize < (sizeof(struct ep11kblob_header) +
+                                  MINEP11AESKEYBLOBSIZE))
+                       return -EINVAL;
+               break;
        default:
                return -EINVAL;
        }
@@ -729,9 +738,10 @@ static int pkey_genseckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
        for (i = 0, rc = -ENODEV; i < nr_apqns; i++) {
                card = apqns[i].card;
                dom = apqns[i].domain;
-               if (ktype == PKEY_TYPE_EP11) {
+               if (ktype == PKEY_TYPE_EP11 ||
+                   ktype == PKEY_TYPE_EP11_AES) {
                        rc = ep11_genaeskey(card, dom, ksize, kflags,
-                                           keybuf, keybufsize);
+                                           keybuf, keybufsize, ktype);
                } else if (ktype == PKEY_TYPE_CCA_DATA) {
                        rc = cca_genseckey(card, dom, ksize, keybuf);
                        *keybufsize = (rc ? 0 : SECKEYBLOBSIZE);
@@ -769,6 +779,11 @@ static int pkey_clr2seckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
                if (*keybufsize < MINEP11AESKEYBLOBSIZE)
                        return -EINVAL;
                break;
+       case PKEY_TYPE_EP11_AES:
+               if (*keybufsize < (sizeof(struct ep11kblob_header) +
+                                  MINEP11AESKEYBLOBSIZE))
+                       return -EINVAL;
+               break;
        default:
                return -EINVAL;
        }
@@ -787,9 +802,11 @@ static int pkey_clr2seckey2(const struct pkey_apqn *apqns, size_t nr_apqns,
        for (i = 0, rc = -ENODEV; i < nr_apqns; i++) {
                card = apqns[i].card;
                dom = apqns[i].domain;
-               if (ktype == PKEY_TYPE_EP11) {
+               if (ktype == PKEY_TYPE_EP11 ||
+                   ktype == PKEY_TYPE_EP11_AES) {
                        rc = ep11_clr2keyblob(card, dom, ksize, kflags,
-                                             clrkey, keybuf, keybufsize);
+                                             clrkey, keybuf, keybufsize,
+                                             ktype);
                } else if (ktype == PKEY_TYPE_CCA_DATA) {
                        rc = cca_clr2seckey(card, dom, ksize,
                                            clrkey, keybuf);
@@ -888,6 +905,7 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
        } else if (hdr->type == TOKTYPE_NON_CCA &&
                   hdr->version == TOKVER_EP11_AES) {
                struct ep11keyblob *kb = (struct ep11keyblob *)key;
+               int api;
 
                rc = ep11_check_aes_key(debug_info, 3, key, keylen, 1);
                if (rc)
@@ -895,10 +913,12 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
                if (ktype)
                        *ktype = PKEY_TYPE_EP11;
                if (ksize)
-                       *ksize = kb->head.keybitlen;
+                       *ksize = kb->head.bitlen;
 
+               api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
                rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
-                                   ZCRYPT_CEX7, EP11_API_V, kb->wkvp);
+                                   ZCRYPT_CEX7, api,
+                                   ep11_kb_wkvp(key, keylen));
                if (rc)
                        goto out;
 
@@ -908,6 +928,32 @@ static int pkey_verifykey2(const u8 *key, size_t keylen,
                *cardnr = ((struct pkey_apqn *)_apqns)->card;
                *domain = ((struct pkey_apqn *)_apqns)->domain;
 
+       } else if (hdr->type == TOKTYPE_NON_CCA &&
+                  hdr->version == TOKVER_EP11_AES_WITH_HEADER) {
+               struct ep11kblob_header *kh = (struct ep11kblob_header *)key;
+               int api;
+
+               rc = ep11_check_aes_key_with_hdr(debug_info, 3,
+                                                key, keylen, 1);
+               if (rc)
+                       goto out;
+               if (ktype)
+                       *ktype = PKEY_TYPE_EP11_AES;
+               if (ksize)
+                       *ksize = kh->bitlen;
+
+               api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
+               rc = ep11_findcard2(&_apqns, &_nr_apqns, *cardnr, *domain,
+                                   ZCRYPT_CEX7, api,
+                                   ep11_kb_wkvp(key, keylen));
+               if (rc)
+                       goto out;
+
+               if (flags)
+                       *flags = PKEY_FLAGS_MATCH_CUR_MKVP;
+
+               *cardnr = ((struct pkey_apqn *)_apqns)->card;
+               *domain = ((struct pkey_apqn *)_apqns)->domain;
        } else {
                rc = -EINVAL;
        }
@@ -949,10 +995,12 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
                }
        } else if (hdr->type == TOKTYPE_NON_CCA) {
                if (hdr->version == TOKVER_EP11_AES) {
-                       if (keylen < sizeof(struct ep11keyblob))
-                               return -EINVAL;
                        if (ep11_check_aes_key(debug_info, 3, key, keylen, 1))
                                return -EINVAL;
+               } else if (hdr->version == TOKVER_EP11_AES_WITH_HEADER) {
+                       if (ep11_check_aes_key_with_hdr(debug_info, 3,
+                                                       key, keylen, 1))
+                               return -EINVAL;
                } else {
                        return pkey_nonccatok2pkey(key, keylen,
                                                   protkey, protkeylen,
@@ -980,10 +1028,7 @@ static int pkey_keyblob2pkey2(const struct pkey_apqn *apqns, size_t nr_apqns,
                                                protkey, protkeylen,
                                                protkeytype);
                } else {
-                       /* EP11 AES secure key blob */
-                       struct ep11keyblob *kb = (struct ep11keyblob *)key;
-
-                       rc = ep11_kblob2protkey(card, dom, key, kb->head.len,
+                       rc = ep11_kblob2protkey(card, dom, key, keylen,
                                                protkey, protkeylen,
                                                protkeytype);
                }
@@ -1018,7 +1063,7 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
                        return -EINVAL;
                if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) {
                        minhwtype = ZCRYPT_CEX7;
-                       api = EP11_API_V;
+                       api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
                }
                rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
                                    minhwtype, api, kb->wkvp);
@@ -1034,7 +1079,7 @@ static int pkey_apqns4key(const u8 *key, size_t keylen, u32 flags,
                        return -EINVAL;
                if (kb->attr & EP11_BLOB_PKEY_EXTRACTABLE) {
                        minhwtype = ZCRYPT_CEX7;
-                       api = EP11_API_V;
+                       api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
                }
                rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
                                    minhwtype, api, kb->wkvp);
@@ -1144,11 +1189,13 @@ static int pkey_apqns4keytype(enum pkey_key_type ktype,
                   ktype == PKEY_TYPE_EP11_AES ||
                   ktype == PKEY_TYPE_EP11_ECC) {
                u8 *wkvp = NULL;
+               int api;
 
                if (flags & PKEY_FLAGS_MATCH_CUR_MKVP)
                        wkvp = cur_mkvp;
+               api = ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4;
                rc = ep11_findcard2(&_apqns, &_nr_apqns, 0xFFFF, 0xFFFF,
-                                   ZCRYPT_CEX7, EP11_API_V, wkvp);
+                                   ZCRYPT_CEX7, api, wkvp);
                if (rc)
                        goto out;
 
@@ -1243,12 +1290,14 @@ static int pkey_keyblob2pkey3(const struct pkey_apqn *apqns, size_t nr_apqns,
                     hdr->version == TOKVER_EP11_ECC_WITH_HEADER) &&
                    is_ep11_keyblob(key + sizeof(struct ep11kblob_header)))
                        rc = ep11_kblob2protkey(card, dom, key, hdr->len,
-                                               protkey, protkeylen, protkeytype);
+                                               protkey, protkeylen,
+                                               protkeytype);
                else if (hdr->type == TOKTYPE_NON_CCA &&
                         hdr->version == TOKVER_EP11_AES &&
                         is_ep11_keyblob(key))
                        rc = ep11_kblob2protkey(card, dom, key, hdr->len,
-                                               protkey, protkeylen, protkeytype);
+                                               protkey, protkeylen,
+                                               protkeytype);
                else if (hdr->type == TOKTYPE_CCA_INTERNAL &&
                         hdr->version == TOKVER_CCA_AES)
                        rc = cca_sec2protkey(card, dom, key, protkey,
@@ -1466,7 +1515,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                apqns = _copy_apqns_from_user(kgs.apqns, kgs.apqn_entries);
                if (IS_ERR(apqns))
                        return PTR_ERR(apqns);
-               kkey = kmalloc(klen, GFP_KERNEL);
+               kkey = kzalloc(klen, GFP_KERNEL);
                if (!kkey) {
                        kfree(apqns);
                        return -ENOMEM;
@@ -1508,7 +1557,7 @@ static long pkey_unlocked_ioctl(struct file *filp, unsigned int cmd,
                apqns = _copy_apqns_from_user(kcs.apqns, kcs.apqn_entries);
                if (IS_ERR(apqns))
                        return PTR_ERR(apqns);
-               kkey = kmalloc(klen, GFP_KERNEL);
+               kkey = kzalloc(klen, GFP_KERNEL);
                if (!kkey) {
                        kfree(apqns);
                        return -ENOMEM;
@@ -2102,7 +2151,7 @@ static struct attribute_group ccacipher_attr_group = {
  * (i.e. off != 0 or count < key blob size) -EINVAL is returned.
  * This function and the sysfs attributes using it provide EP11 key blobs
  * padded to the upper limit of MAXEP11AESKEYBLOBSIZE which is currently
- * 320 bytes.
+ * 336 bytes.
  */
 static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
                                       bool is_xts, char *buf, loff_t off,
@@ -2120,7 +2169,9 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
 
        /* build a list of apqns able to generate an cipher key */
        rc = ep11_findcard2(&apqns, &nr_apqns, 0xFFFF, 0xFFFF,
-                           ZCRYPT_CEX7, EP11_API_V, NULL);
+                           ZCRYPT_CEX7,
+                           ap_is_se_guest() ? EP11_API_V6 : EP11_API_V4,
+                           NULL);
        if (rc)
                return rc;
 
@@ -2130,7 +2181,8 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
        for (i = 0, rc = -ENODEV; i < nr_apqns; i++) {
                card = apqns[i] >> 16;
                dom = apqns[i] & 0xFFFF;
-               rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize);
+               rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize,
+                                   PKEY_TYPE_EP11_AES);
                if (rc == 0)
                        break;
        }
@@ -2140,7 +2192,8 @@ static ssize_t pkey_ep11_aes_attr_read(enum pkey_key_size keybits,
        if (is_xts) {
                keysize = MAXEP11AESKEYBLOBSIZE;
                buf += MAXEP11AESKEYBLOBSIZE;
-               rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize);
+               rc = ep11_genaeskey(card, dom, keybits, 0, buf, &keysize,
+                                   PKEY_TYPE_EP11_AES);
                if (rc == 0)
                        return 2 * MAXEP11AESKEYBLOBSIZE;
        }
index b441745..0509f80 100644 (file)
 #define AP_QUEUE_UNASSIGNED "unassigned"
 #define AP_QUEUE_IN_USE "in use"
 
-#define MAX_RESET_CHECK_WAIT   200     /* Sleep max 200ms for reset check      */
 #define AP_RESET_INTERVAL              20      /* Reset sleep interval (20ms)          */
 
 static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable);
 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
 static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
-static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
+static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q);
 
 /**
  * get_update_locks_for_kvm: Acquire the locks required to dynamically update a
@@ -360,6 +359,28 @@ static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, dma_addr_t *nib)
        return 0;
 }
 
+static int ensure_nib_shared(unsigned long addr, struct gmap *gmap)
+{
+       int ret;
+
+       /*
+        * The nib has to be located in shared storage since guest and
+        * host access it. vfio_pin_pages() will do a pin shared and
+        * if that fails (possibly because it's not a shared page) it
+        * calls export. We try to do a second pin shared here so that
+        * the UV gives us an error code if we try to pin a non-shared
+        * page.
+        *
+        * If the page is already pinned shared the UV will return a success.
+        */
+       ret = uv_pin_shared(addr);
+       if (ret) {
+               /* vfio_pin_pages() likely exported the page so let's re-import */
+               gmap_convert_to_secure(gmap, addr);
+       }
+       return ret;
+}
+
 /**
  * vfio_ap_irq_enable - Enable Interruption for a APQN
  *
@@ -423,6 +444,14 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
        h_nib = page_to_phys(h_page) | (nib & ~PAGE_MASK);
        aqic_gisa.gisc = isc;
 
+       /* NIB in non-shared storage is a rc 6 for PV guests */
+       if (kvm_s390_pv_cpu_is_protected(vcpu) &&
+           ensure_nib_shared(h_nib & PAGE_MASK, kvm->arch.gmap)) {
+               vfio_unpin_pages(&q->matrix_mdev->vdev, nib, 1);
+               status.response_code = AP_RESPONSE_INVALID_ADDRESS;
+               return status;
+       }
+
        nisc = kvm_s390_gisc_register(kvm, isc);
        if (nisc < 0) {
                VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
@@ -675,7 +704,7 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
                         */
                        apqn = AP_MKQID(apid, apqi);
                        q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
-                       if (!q || q->reset_rc) {
+                       if (!q || q->reset_status.response_code) {
                                clear_bit_inv(apid,
                                              matrix_mdev->shadow_apcb.apm);
                                break;
@@ -1608,19 +1637,21 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
 {
        switch (status->response_code) {
        case AP_RESPONSE_NORMAL:
+       case AP_RESPONSE_DECONFIGURED:
+               return 0;
        case AP_RESPONSE_RESET_IN_PROGRESS:
-               if (status->queue_empty && !status->irq_enabled)
-                       return 0;
+       case AP_RESPONSE_BUSY:
                return -EBUSY;
-       case AP_RESPONSE_DECONFIGURED:
+       case AP_RESPONSE_ASSOC_SECRET_NOT_UNIQUE:
+       case AP_RESPONSE_ASSOC_FAILED:
                /*
-                * If the AP queue is deconfigured, any subsequent AP command
-                * targeting the queue will fail with the same response code. On the
-                * other hand, when an AP adapter is deconfigured, the associated
-                * queues are reset, so let's return a value indicating the reset
-                * for which we're waiting completed successfully.
+                * These asynchronous response codes indicate a PQAP(AAPQ)
+                * instruction to associate a secret with the guest failed. All
+                * subsequent AP instructions will end with the asynchronous
+                * response code until the AP queue is reset; so, let's return
+                * a value indicating a reset needs to be performed again.
                 */
-               return 0;
+               return -EAGAIN;
        default:
                WARN(true,
                     "failed to verify reset of queue %02x.%04x: TAPQ rc=%u\n",
@@ -1630,91 +1661,105 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
        }
 }
 
-static int apq_reset_check(struct vfio_ap_queue *q)
+#define WAIT_MSG "Waited %dms for reset of queue %02x.%04x (%u, %u, %u)"
+
+static void apq_reset_check(struct work_struct *reset_work)
 {
-       int ret;
-       int iters = MAX_RESET_CHECK_WAIT / AP_RESET_INTERVAL;
+       int ret = -EBUSY, elapsed = 0;
        struct ap_queue_status status;
+       struct vfio_ap_queue *q;
 
-       for (; iters > 0; iters--) {
+       q = container_of(reset_work, struct vfio_ap_queue, reset_work);
+       memcpy(&status, &q->reset_status, sizeof(status));
+       while (true) {
                msleep(AP_RESET_INTERVAL);
+               elapsed += AP_RESET_INTERVAL;
                status = ap_tapq(q->apqn, NULL);
                ret = apq_status_check(q->apqn, &status);
-               if (ret != -EBUSY)
-                       return ret;
+               if (ret == -EIO)
+                       return;
+               if (ret == -EBUSY) {
+                       pr_notice_ratelimited(WAIT_MSG, elapsed,
+                                             AP_QID_CARD(q->apqn),
+                                             AP_QID_QUEUE(q->apqn),
+                                             status.response_code,
+                                             status.queue_empty,
+                                             status.irq_enabled);
+               } else {
+                       if (q->reset_status.response_code == AP_RESPONSE_RESET_IN_PROGRESS ||
+                           q->reset_status.response_code == AP_RESPONSE_BUSY ||
+                           q->reset_status.response_code == AP_RESPONSE_STATE_CHANGE_IN_PROGRESS ||
+                           ret == -EAGAIN) {
+                               status = ap_zapq(q->apqn, 0);
+                               memcpy(&q->reset_status, &status, sizeof(status));
+                               continue;
+                       }
+                       /*
+                        * When an AP adapter is deconfigured, the
+                        * associated queues are reset, so let's set the
+                        * status response code to 0 so the queue may be
+                        * passed through (i.e., not filtered)
+                        */
+                       if (status.response_code == AP_RESPONSE_DECONFIGURED)
+                               q->reset_status.response_code = 0;
+                       if (q->saved_isc != VFIO_AP_ISC_INVALID)
+                               vfio_ap_free_aqic_resources(q);
+                       break;
+               }
        }
-       WARN_ONCE(iters <= 0,
-                 "timeout verifying reset of queue %02x.%04x (%u, %u, %u)",
-                 AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
-                 status.queue_empty, status.irq_enabled, status.response_code);
-       return ret;
 }
 
-static int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
+static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 {
        struct ap_queue_status status;
-       int ret;
 
        if (!q)
-               return 0;
-retry_zapq:
+               return;
        status = ap_zapq(q->apqn, 0);
-       q->reset_rc = status.response_code;
+       memcpy(&q->reset_status, &status, sizeof(status));
        switch (status.response_code) {
        case AP_RESPONSE_NORMAL:
-               ret = 0;
-               /* if the reset has not completed, wait for it to take effect */
-               if (!status.queue_empty || status.irq_enabled)
-                       ret = apq_reset_check(q);
-               break;
        case AP_RESPONSE_RESET_IN_PROGRESS:
+       case AP_RESPONSE_BUSY:
+       case AP_RESPONSE_STATE_CHANGE_IN_PROGRESS:
                /*
-                * There is a reset issued by another process in progress. Let's wait
-                * for that to complete. Since we have no idea whether it was a RAPQ or
-                * ZAPQ, then if it completes successfully, let's issue the ZAPQ.
+                * Let's verify whether the ZAPQ completed successfully on a work queue.
                 */
-               ret = apq_reset_check(q);
-               if (ret)
-                       break;
-               goto retry_zapq;
+               queue_work(system_long_wq, &q->reset_work);
+               break;
        case AP_RESPONSE_DECONFIGURED:
                /*
                 * When an AP adapter is deconfigured, the associated
-                * queues are reset, so let's return a value indicating the reset
-                * completed successfully.
+                * queues are reset, so let's set the status response code to 0
+                * so the queue may be passed through (i.e., not filtered).
                 */
-               ret = 0;
+               q->reset_status.response_code = 0;
+               vfio_ap_free_aqic_resources(q);
                break;
        default:
                WARN(true,
                     "PQAP/ZAPQ for %02x.%04x failed with invalid rc=%u\n",
                     AP_QID_CARD(q->apqn), AP_QID_QUEUE(q->apqn),
                     status.response_code);
-               return -EIO;
        }
-
-       vfio_ap_free_aqic_resources(q);
-
-       return ret;
 }
 
 static int vfio_ap_mdev_reset_queues(struct ap_queue_table *qtable)
 {
-       int ret, loop_cursor, rc = 0;
+       int ret = 0, loop_cursor;
        struct vfio_ap_queue *q;
 
+       hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode)
+               vfio_ap_mdev_reset_queue(q);
+
        hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
-               ret = vfio_ap_mdev_reset_queue(q);
-               /*
-                * Regardless whether a queue turns out to be busy, or
-                * is not operational, we need to continue resetting
-                * the remaining queues.
-                */
-               if (ret)
-                       rc = ret;
+               flush_work(&q->reset_work);
+
+               if (q->reset_status.response_code)
+                       ret = -EIO;
        }
 
-       return rc;
+       return ret;
 }
 
 static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
@@ -2038,6 +2083,8 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev)
 
        q->apqn = to_ap_queue(&apdev->device)->qid;
        q->saved_isc = VFIO_AP_ISC_INVALID;
+       memset(&q->reset_status, 0, sizeof(q->reset_status));
+       INIT_WORK(&q->reset_work, apq_reset_check);
        matrix_mdev = get_update_locks_by_apqn(q->apqn);
 
        if (matrix_mdev) {
@@ -2087,6 +2134,7 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev)
        }
 
        vfio_ap_mdev_reset_queue(q);
+       flush_work(&q->reset_work);
        dev_set_drvdata(&apdev->device, NULL);
        kfree(q);
        release_update_locks_for_mdev(matrix_mdev);
index 4642bbd..88aff8b 100644 (file)
@@ -133,7 +133,8 @@ struct ap_matrix_mdev {
  * @apqn: the APQN of the AP queue device
  * @saved_isc: the guest ISC registered with the GIB interface
  * @mdev_qnode: allows the vfio_ap_queue struct to be added to a hashtable
- * @reset_rc: the status response code from the last reset of the queue
+ * @reset_status: the status from the last reset of the queue
+ * @reset_work: work to wait for queue reset to complete
  */
 struct vfio_ap_queue {
        struct ap_matrix_mdev *matrix_mdev;
@@ -142,7 +143,8 @@ struct vfio_ap_queue {
 #define VFIO_AP_ISC_INVALID 0xff
        unsigned char saved_isc;
        struct hlist_node mdev_qnode;
-       unsigned int reset_rc;
+       struct ap_queue_status reset_status;
+       struct work_struct reset_work;
 };
 
 int vfio_ap_mdev_register(void);
index 83f692c..e69de29 100644 (file)
@@ -1,227 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- *  Copyright IBM Corp. 2001, 2012
- *  Author(s): Robert Burroughs
- *            Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *                               Ralph Wuerthner <rwuerthn@de.ibm.com>
- *  MSGTYPE restruct:            Holger Dengler <hd@linux.vnet.ibm.com>
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <linux/mod_devicetable.h>
-
-#include "ap_bus.h"
-#include "zcrypt_api.h"
-#include "zcrypt_error.h"
-#include "zcrypt_cex2a.h"
-#include "zcrypt_msgtype50.h"
-
-#define CEX2A_MIN_MOD_SIZE       1     /*    8 bits    */
-#define CEX2A_MAX_MOD_SIZE     256     /* 2048 bits    */
-#define CEX3A_MIN_MOD_SIZE     CEX2A_MIN_MOD_SIZE
-#define CEX3A_MAX_MOD_SIZE     512     /* 4096 bits    */
-
-#define CEX2A_MAX_MESSAGE_SIZE 0x390   /* sizeof(struct type50_crb2_msg)    */
-#define CEX2A_MAX_RESPONSE_SIZE 0x110  /* max outputdatalength + type80_hdr */
-
-#define CEX3A_MAX_RESPONSE_SIZE        0x210   /* 512 bit modulus
-                                        * (max outputdatalength) +
-                                        * type80_hdr
-                                        */
-#define CEX3A_MAX_MESSAGE_SIZE sizeof(struct type50_crb3_msg)
-
-#define CEX2A_CLEANUP_TIME     (15 * HZ)
-#define CEX3A_CLEANUP_TIME     CEX2A_CLEANUP_TIME
-
-MODULE_AUTHOR("IBM Corporation");
-MODULE_DESCRIPTION("CEX2A/CEX3A Cryptographic Coprocessor device driver, " \
-                  "Copyright IBM Corp. 2001, 2018");
-MODULE_LICENSE("GPL");
-
-static struct ap_device_id zcrypt_cex2a_card_ids[] = {
-       { .dev_type = AP_DEVICE_TYPE_CEX2A,
-         .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
-       { .dev_type = AP_DEVICE_TYPE_CEX3A,
-         .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
-       { /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_card_ids);
-
-static struct ap_device_id zcrypt_cex2a_queue_ids[] = {
-       { .dev_type = AP_DEVICE_TYPE_CEX2A,
-         .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
-       { .dev_type = AP_DEVICE_TYPE_CEX3A,
-         .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
-       { /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_queue_ids);
-
-/*
- * Probe function for CEX2A card devices. It always accepts the AP device
- * since the bus_match already checked the card type.
- * @ap_dev: pointer to the AP device.
- */
-static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
-{
-       /*
-        * Normalized speed ratings per crypto adapter
-        * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
-        */
-       static const int CEX2A_SPEED_IDX[] = {
-               800, 1000, 2000,  900, 1200, 2400, 0, 0};
-       static const int CEX3A_SPEED_IDX[] = {
-               400,  500, 1000,  450,  550, 1200, 0, 0};
-
-       struct ap_card *ac = to_ap_card(&ap_dev->device);
-       struct zcrypt_card *zc;
-       int rc = 0;
-
-       zc = zcrypt_card_alloc();
-       if (!zc)
-               return -ENOMEM;
-       zc->card = ac;
-       dev_set_drvdata(&ap_dev->device, zc);
-
-       if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX2A) {
-               zc->min_mod_size = CEX2A_MIN_MOD_SIZE;
-               zc->max_mod_size = CEX2A_MAX_MOD_SIZE;
-               zc->speed_rating = CEX2A_SPEED_IDX;
-               zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
-               zc->type_string = "CEX2A";
-               zc->user_space_type = ZCRYPT_CEX2A;
-       } else if (ac->ap_dev.device_type == AP_DEVICE_TYPE_CEX3A) {
-               zc->min_mod_size = CEX2A_MIN_MOD_SIZE;
-               zc->max_mod_size = CEX2A_MAX_MOD_SIZE;
-               zc->max_exp_bit_length = CEX2A_MAX_MOD_SIZE;
-               if (ap_test_bit(&ac->functions, AP_FUNC_MEX4K) &&
-                   ap_test_bit(&ac->functions, AP_FUNC_CRT4K)) {
-                       zc->max_mod_size = CEX3A_MAX_MOD_SIZE;
-                       zc->max_exp_bit_length = CEX3A_MAX_MOD_SIZE;
-               }
-               zc->speed_rating = CEX3A_SPEED_IDX;
-               zc->type_string = "CEX3A";
-               zc->user_space_type = ZCRYPT_CEX3A;
-       } else {
-               zcrypt_card_free(zc);
-               return -ENODEV;
-       }
-       zc->online = 1;
-
-       rc = zcrypt_card_register(zc);
-       if (rc)
-               zcrypt_card_free(zc);
-
-       return rc;
-}
-
-/*
- * This is called to remove the CEX2A card driver information
- * if an AP card device is removed.
- */
-static void zcrypt_cex2a_card_remove(struct ap_device *ap_dev)
-{
-       struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device);
-
-       zcrypt_card_unregister(zc);
-}
-
-static struct ap_driver zcrypt_cex2a_card_driver = {
-       .probe = zcrypt_cex2a_card_probe,
-       .remove = zcrypt_cex2a_card_remove,
-       .ids = zcrypt_cex2a_card_ids,
-       .flags = AP_DRIVER_FLAG_DEFAULT,
-};
-
-/*
- * Probe function for CEX2A queue devices. It always accepts the AP device
- * since the bus_match already checked the queue type.
- * @ap_dev: pointer to the AP device.
- */
-static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
-{
-       struct ap_queue *aq = to_ap_queue(&ap_dev->device);
-       struct zcrypt_queue *zq = NULL;
-       int rc;
-
-       switch (ap_dev->device_type) {
-       case AP_DEVICE_TYPE_CEX2A:
-               zq = zcrypt_queue_alloc(CEX2A_MAX_RESPONSE_SIZE);
-               if (!zq)
-                       return -ENOMEM;
-               break;
-       case AP_DEVICE_TYPE_CEX3A:
-               zq = zcrypt_queue_alloc(CEX3A_MAX_RESPONSE_SIZE);
-               if (!zq)
-                       return -ENOMEM;
-               break;
-       }
-       if (!zq)
-               return -ENODEV;
-       zq->ops = zcrypt_msgtype(MSGTYPE50_NAME, MSGTYPE50_VARIANT_DEFAULT);
-       zq->queue = aq;
-       zq->online = 1;
-       atomic_set(&zq->load, 0);
-       ap_queue_init_state(aq);
-       ap_queue_init_reply(aq, &zq->reply);
-       aq->request_timeout = CEX2A_CLEANUP_TIME;
-       dev_set_drvdata(&ap_dev->device, zq);
-       rc = zcrypt_queue_register(zq);
-       if (rc)
-               zcrypt_queue_free(zq);
-
-       return rc;
-}
-
-/*
- * This is called to remove the CEX2A queue driver information
- * if an AP queue device is removed.
- */
-static void zcrypt_cex2a_queue_remove(struct ap_device *ap_dev)
-{
-       struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device);
-
-       zcrypt_queue_unregister(zq);
-}
-
-static struct ap_driver zcrypt_cex2a_queue_driver = {
-       .probe = zcrypt_cex2a_queue_probe,
-       .remove = zcrypt_cex2a_queue_remove,
-       .ids = zcrypt_cex2a_queue_ids,
-       .flags = AP_DRIVER_FLAG_DEFAULT,
-};
-
-int __init zcrypt_cex2a_init(void)
-{
-       int rc;
-
-       rc = ap_driver_register(&zcrypt_cex2a_card_driver,
-                               THIS_MODULE, "cex2acard");
-       if (rc)
-               return rc;
-
-       rc = ap_driver_register(&zcrypt_cex2a_queue_driver,
-                               THIS_MODULE, "cex2aqueue");
-       if (rc)
-               ap_driver_unregister(&zcrypt_cex2a_card_driver);
-
-       return rc;
-}
-
-void __exit zcrypt_cex2a_exit(void)
-{
-       ap_driver_unregister(&zcrypt_cex2a_queue_driver);
-       ap_driver_unregister(&zcrypt_cex2a_card_driver);
-}
-
-module_init(zcrypt_cex2a_init);
-module_exit(zcrypt_cex2a_exit);
index 7842214..e69de29 100644 (file)
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- *  Copyright IBM Corp. 2001, 2006
- *  Author(s): Robert Burroughs
- *            Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-
-#ifndef _ZCRYPT_CEX2A_H_
-#define _ZCRYPT_CEX2A_H_
-
-/**
- * The type 50 message family is associated with CEXxA cards.
- *
- * The four members of the family are described below.
- *
- * Note that all unsigned char arrays are right-justified and left-padded
- * with zeroes.
- *
- * Note that all reserved fields must be zeroes.
- */
-struct type50_hdr {
-       unsigned char   reserved1;
-       unsigned char   msg_type_code;  /* 0x50 */
-       unsigned short  msg_len;
-       unsigned char   reserved2;
-       unsigned char   ignored;
-       unsigned short  reserved3;
-} __packed;
-
-#define TYPE50_TYPE_CODE       0x50
-
-#define TYPE50_MEB1_FMT                0x0001
-#define TYPE50_MEB2_FMT                0x0002
-#define TYPE50_MEB3_FMT                0x0003
-#define TYPE50_CRB1_FMT                0x0011
-#define TYPE50_CRB2_FMT                0x0012
-#define TYPE50_CRB3_FMT                0x0013
-
-/* Mod-Exp, with a small modulus */
-struct type50_meb1_msg {
-       struct type50_hdr header;
-       unsigned short  keyblock_type;  /* 0x0001 */
-       unsigned char   reserved[6];
-       unsigned char   exponent[128];
-       unsigned char   modulus[128];
-       unsigned char   message[128];
-} __packed;
-
-/* Mod-Exp, with a large modulus */
-struct type50_meb2_msg {
-       struct type50_hdr header;
-       unsigned short  keyblock_type;  /* 0x0002 */
-       unsigned char   reserved[6];
-       unsigned char   exponent[256];
-       unsigned char   modulus[256];
-       unsigned char   message[256];
-} __packed;
-
-/* Mod-Exp, with a larger modulus */
-struct type50_meb3_msg {
-       struct type50_hdr header;
-       unsigned short  keyblock_type;  /* 0x0003 */
-       unsigned char   reserved[6];
-       unsigned char   exponent[512];
-       unsigned char   modulus[512];
-       unsigned char   message[512];
-} __packed;
-
-/* CRT, with a small modulus */
-struct type50_crb1_msg {
-       struct type50_hdr header;
-       unsigned short  keyblock_type;  /* 0x0011 */
-       unsigned char   reserved[6];
-       unsigned char   p[64];
-       unsigned char   q[64];
-       unsigned char   dp[64];
-       unsigned char   dq[64];
-       unsigned char   u[64];
-       unsigned char   message[128];
-} __packed;
-
-/* CRT, with a large modulus */
-struct type50_crb2_msg {
-       struct type50_hdr header;
-       unsigned short  keyblock_type;  /* 0x0012 */
-       unsigned char   reserved[6];
-       unsigned char   p[128];
-       unsigned char   q[128];
-       unsigned char   dp[128];
-       unsigned char   dq[128];
-       unsigned char   u[128];
-       unsigned char   message[256];
-} __packed;
-
-/* CRT, with a larger modulus */
-struct type50_crb3_msg {
-       struct type50_hdr header;
-       unsigned short  keyblock_type;  /* 0x0013 */
-       unsigned char   reserved[6];
-       unsigned char   p[256];
-       unsigned char   q[256];
-       unsigned char   dp[256];
-       unsigned char   dq[256];
-       unsigned char   u[256];
-       unsigned char   message[512];
-} __packed;
-
-/**
- * The type 80 response family is associated with a CEXxA cards.
- *
- * Note that all unsigned char arrays are right-justified and left-padded
- * with zeroes.
- *
- * Note that all reserved fields must be zeroes.
- */
-
-#define TYPE80_RSP_CODE 0x80
-
-struct type80_hdr {
-       unsigned char   reserved1;
-       unsigned char   type;           /* 0x80 */
-       unsigned short  len;
-       unsigned char   code;           /* 0x00 */
-       unsigned char   reserved2[3];
-       unsigned char   reserved3[8];
-} __packed;
-
-int zcrypt_cex2a_init(void);
-void zcrypt_cex2a_exit(void);
-
-#endif /* _ZCRYPT_CEX2A_H_ */
index 251b5bd..e69de29 100644 (file)
@@ -1,421 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- *  Copyright IBM Corp. 2001, 2018
- *  Author(s): Robert Burroughs
- *            Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *                               Ralph Wuerthner <rwuerthn@de.ibm.com>
- *  MSGTYPE restruct:            Holger Dengler <hd@linux.vnet.ibm.com>
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/err.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <linux/mod_devicetable.h>
-
-#include "ap_bus.h"
-#include "zcrypt_api.h"
-#include "zcrypt_error.h"
-#include "zcrypt_msgtype6.h"
-#include "zcrypt_cex2c.h"
-#include "zcrypt_cca_key.h"
-#include "zcrypt_ccamisc.h"
-
-#define CEX2C_MIN_MOD_SIZE      16     /*  128 bits    */
-#define CEX2C_MAX_MOD_SIZE     256     /* 2048 bits    */
-#define CEX3C_MIN_MOD_SIZE      16     /*  128 bits    */
-#define CEX3C_MAX_MOD_SIZE     512     /* 4096 bits    */
-#define CEX2C_MAX_XCRB_MESSAGE_SIZE (12 * 1024)
-#define CEX2C_CLEANUP_TIME     (15 * HZ)
-
-MODULE_AUTHOR("IBM Corporation");
-MODULE_DESCRIPTION("CEX2C/CEX3C Cryptographic Coprocessor device driver, " \
-                  "Copyright IBM Corp. 2001, 2018");
-MODULE_LICENSE("GPL");
-
-static struct ap_device_id zcrypt_cex2c_card_ids[] = {
-       { .dev_type = AP_DEVICE_TYPE_CEX2C,
-         .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
-       { .dev_type = AP_DEVICE_TYPE_CEX3C,
-         .match_flags = AP_DEVICE_ID_MATCH_CARD_TYPE },
-       { /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_card_ids);
-
-static struct ap_device_id zcrypt_cex2c_queue_ids[] = {
-       { .dev_type = AP_DEVICE_TYPE_CEX2C,
-         .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
-       { .dev_type = AP_DEVICE_TYPE_CEX3C,
-         .match_flags = AP_DEVICE_ID_MATCH_QUEUE_TYPE },
-       { /* end of list */ },
-};
-
-MODULE_DEVICE_TABLE(ap, zcrypt_cex2c_queue_ids);
-
-/*
- * CCA card additional device attributes
- */
-static ssize_t cca_serialnr_show(struct device *dev,
-                                struct device_attribute *attr,
-                                char *buf)
-{
-       struct zcrypt_card *zc = dev_get_drvdata(dev);
-       struct cca_info ci;
-       struct ap_card *ac = to_ap_card(dev);
-
-       memset(&ci, 0, sizeof(ci));
-
-       if (ap_domain_index >= 0)
-               cca_get_info(ac->id, ap_domain_index, &ci, zc->online);
-
-       return sysfs_emit(buf, "%s\n", ci.serial);
-}
-
-static struct device_attribute dev_attr_cca_serialnr =
-       __ATTR(serialnr, 0444, cca_serialnr_show, NULL);
-
-static struct attribute *cca_card_attrs[] = {
-       &dev_attr_cca_serialnr.attr,
-       NULL,
-};
-
-static const struct attribute_group cca_card_attr_grp = {
-       .attrs = cca_card_attrs,
-};
-
- /*
-  * CCA queue additional device attributes
-  */
-static ssize_t cca_mkvps_show(struct device *dev,
-                             struct device_attribute *attr,
-                             char *buf)
-{
-       struct zcrypt_queue *zq = dev_get_drvdata(dev);
-       int n = 0;
-       struct cca_info ci;
-       static const char * const cao_state[] = { "invalid", "valid" };
-       static const char * const new_state[] = { "empty", "partial", "full" };
-
-       memset(&ci, 0, sizeof(ci));
-
-       cca_get_info(AP_QID_CARD(zq->queue->qid),
-                    AP_QID_QUEUE(zq->queue->qid),
-                    &ci, zq->online);
-
-       if (ci.new_aes_mk_state >= '1' && ci.new_aes_mk_state <= '3')
-               n = sysfs_emit(buf, "AES NEW: %s 0x%016llx\n",
-                              new_state[ci.new_aes_mk_state - '1'],
-                              ci.new_aes_mkvp);
-       else
-               n = sysfs_emit(buf, "AES NEW: - -\n");
-
-       if (ci.cur_aes_mk_state >= '1' && ci.cur_aes_mk_state <= '2')
-               n += sysfs_emit_at(buf, n, "AES CUR: %s 0x%016llx\n",
-                                  cao_state[ci.cur_aes_mk_state - '1'],
-                                  ci.cur_aes_mkvp);
-       else
-               n += sysfs_emit_at(buf, n, "AES CUR: - -\n");
-
-       if (ci.old_aes_mk_state >= '1' && ci.old_aes_mk_state <= '2')
-               n += sysfs_emit_at(buf, n, "AES OLD: %s 0x%016llx\n",
-                                  cao_state[ci.old_aes_mk_state - '1'],
-                                  ci.old_aes_mkvp);
-       else
-               n += sysfs_emit_at(buf, n, "AES OLD: - -\n");
-
-       if (ci.new_apka_mk_state >= '1' && ci.new_apka_mk_state <= '3')
-               n += sysfs_emit_at(buf, n, "APKA NEW: %s 0x%016llx\n",
-                                  new_state[ci.new_apka_mk_state - '1'],
-                                  ci.new_apka_mkvp);
-       else
-               n += sysfs_emit_at(buf, n, "APKA NEW: - -\n");
-
-       if (ci.cur_apka_mk_state >= '1' && ci.cur_apka_mk_state <= '2')
-               n += sysfs_emit_at(buf, n, "APKA CUR: %s 0x%016llx\n",
-                                  cao_state[ci.cur_apka_mk_state - '1'],
-                                  ci.cur_apka_mkvp);
-       else
-               n += sysfs_emit_at(buf, n, "APKA CUR: - -\n");
-
-       if (ci.old_apka_mk_state >= '1' && ci.old_apka_mk_state <= '2')
-               n += sysfs_emit_at(buf, n, "APKA OLD: %s 0x%016llx\n",
-                                  cao_state[ci.old_apka_mk_state - '1'],
-                                  ci.old_apka_mkvp);
-       else
-               n += sysfs_emit_at(buf, n, "APKA OLD: - -\n");
-
-       return n;
-}
-
-static struct device_attribute dev_attr_cca_mkvps =
-       __ATTR(mkvps, 0444, cca_mkvps_show, NULL);
-
-static struct attribute *cca_queue_attrs[] = {
-       &dev_attr_cca_mkvps.attr,
-       NULL,
-};
-
-static const struct attribute_group cca_queue_attr_grp = {
-       .attrs = cca_queue_attrs,
-};
-
-/*
- * Large random number detection function. Its sends a message to a CEX2C/CEX3C
- * card to find out if large random numbers are supported.
- * @ap_dev: pointer to the AP device.
- *
- * Returns 1 if large random numbers are supported, 0 if not and < 0 on error.
- */
-static int zcrypt_cex2c_rng_supported(struct ap_queue *aq)
-{
-       struct ap_message ap_msg;
-       unsigned long psmid;
-       unsigned int domain;
-       struct {
-               struct type86_hdr hdr;
-               struct type86_fmt2_ext fmt2;
-               struct CPRBX cprbx;
-       } __packed *reply;
-       struct {
-               struct type6_hdr hdr;
-               struct CPRBX cprbx;
-               char function_code[2];
-               short int rule_length;
-               char rule[8];
-               short int verb_length;
-               short int key_length;
-       } __packed *msg;
-       int rc, i;
-
-       ap_init_message(&ap_msg);
-       ap_msg.msg = (void *)get_zeroed_page(GFP_KERNEL);
-       if (!ap_msg.msg)
-               return -ENOMEM;
-       ap_msg.bufsize = PAGE_SIZE;
-
-       rng_type6cprb_msgx(&ap_msg, 4, &domain);
-
-       msg = ap_msg.msg;
-       msg->cprbx.domain = AP_QID_QUEUE(aq->qid);
-
-       rc = ap_send(aq->qid, 0x0102030405060708UL, ap_msg.msg, ap_msg.len);
-       if (rc)
-               goto out_free;
-
-       /* Wait for the test message to complete. */
-       for (i = 0; i < 2 * HZ; i++) {
-               msleep(1000 / HZ);
-               rc = ap_recv(aq->qid, &psmid, ap_msg.msg, ap_msg.bufsize);
-               if (rc == 0 && psmid == 0x0102030405060708UL)
-                       break;
-       }
-
-       if (i >= 2 * HZ) {
-               /* Got no answer. */
-               rc = -ENODEV;
-               goto out_free;
-       }
-
-       reply = ap_msg.msg;
-       if (reply->cprbx.ccp_rtcode == 0 && reply->cprbx.ccp_rscode == 0)
-               rc = 1;
-       else
-               rc = 0;
-out_free:
-       free_page((unsigned long)ap_msg.msg);
-       return rc;
-}
-
-/*
- * Probe function for CEX2C/CEX3C card devices. It always accepts the
- * AP device since the bus_match already checked the hardware type.
- * @ap_dev: pointer to the AP card device.
- */
-static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
-{
-       /*
-        * Normalized speed ratings per crypto adapter
-        * MEX_1k, MEX_2k, MEX_4k, CRT_1k, CRT_2k, CRT_4k, RNG, SECKEY
-        */
-       static const int CEX2C_SPEED_IDX[] = {
-               1000, 1400, 2400, 1100, 1500, 2600, 100, 12};
-       static const int CEX3C_SPEED_IDX[] = {
-               500,  700, 1400,  550,  800, 1500,  80, 10};
-
-       struct ap_card *ac = to_ap_card(&ap_dev->device);
-       struct zcrypt_card *zc;
-       int rc = 0;
-
-       zc = zcrypt_card_alloc();
-       if (!zc)
-               return -ENOMEM;
-       zc->card = ac;
-       dev_set_drvdata(&ap_dev->device, zc);
-       switch (ac->ap_dev.device_type) {
-       case AP_DEVICE_TYPE_CEX2C:
-               zc->user_space_type = ZCRYPT_CEX2C;
-               zc->type_string = "CEX2C";
-               zc->speed_rating = CEX2C_SPEED_IDX;
-               zc->min_mod_size = CEX2C_MIN_MOD_SIZE;
-               zc->max_mod_size = CEX2C_MAX_MOD_SIZE;
-               zc->max_exp_bit_length = CEX2C_MAX_MOD_SIZE;
-               break;
-       case AP_DEVICE_TYPE_CEX3C:
-               zc->user_space_type = ZCRYPT_CEX3C;
-               zc->type_string = "CEX3C";
-               zc->speed_rating = CEX3C_SPEED_IDX;
-               zc->min_mod_size = CEX3C_MIN_MOD_SIZE;
-               zc->max_mod_size = CEX3C_MAX_MOD_SIZE;
-               zc->max_exp_bit_length = CEX3C_MAX_MOD_SIZE;
-               break;
-       default:
-               zcrypt_card_free(zc);
-               return -ENODEV;
-       }
-       zc->online = 1;
-
-       rc = zcrypt_card_register(zc);
-       if (rc) {
-               zcrypt_card_free(zc);
-               return rc;
-       }
-
-       if (ap_test_bit(&ac->functions, AP_FUNC_COPRO)) {
-               rc = sysfs_create_group(&ap_dev->device.kobj,
-                                       &cca_card_attr_grp);
-               if (rc) {
-                       zcrypt_card_unregister(zc);
-                       zcrypt_card_free(zc);
-               }
-       }
-
-       return rc;
-}
-
-/*
- * This is called to remove the CEX2C/CEX3C card driver information
- * if an AP card device is removed.
- */
-static void zcrypt_cex2c_card_remove(struct ap_device *ap_dev)
-{
-       struct zcrypt_card *zc = dev_get_drvdata(&ap_dev->device);
-       struct ap_card *ac = to_ap_card(&ap_dev->device);
-
-       if (ap_test_bit(&ac->functions, AP_FUNC_COPRO))
-               sysfs_remove_group(&ap_dev->device.kobj, &cca_card_attr_grp);
-
-       zcrypt_card_unregister(zc);
-}
-
-static struct ap_driver zcrypt_cex2c_card_driver = {
-       .probe = zcrypt_cex2c_card_probe,
-       .remove = zcrypt_cex2c_card_remove,
-       .ids = zcrypt_cex2c_card_ids,
-       .flags = AP_DRIVER_FLAG_DEFAULT,
-};
-
-/*
- * Probe function for CEX2C/CEX3C queue devices. It always accepts the
- * AP device since the bus_match already checked the hardware type.
- * @ap_dev: pointer to the AP card device.
- */
-static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev)
-{
-       struct ap_queue *aq = to_ap_queue(&ap_dev->device);
-       struct zcrypt_queue *zq;
-       int rc;
-
-       zq = zcrypt_queue_alloc(CEX2C_MAX_XCRB_MESSAGE_SIZE);
-       if (!zq)
-               return -ENOMEM;
-       zq->queue = aq;
-       zq->online = 1;
-       atomic_set(&zq->load, 0);
-       ap_rapq(aq->qid, 0);
-       rc = zcrypt_cex2c_rng_supported(aq);
-       if (rc < 0) {
-               zcrypt_queue_free(zq);
-               return rc;
-       }
-       if (rc)
-               zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
-                                        MSGTYPE06_VARIANT_DEFAULT);
-       else
-               zq->ops = zcrypt_msgtype(MSGTYPE06_NAME,
-                                        MSGTYPE06_VARIANT_NORNG);
-       ap_queue_init_state(aq);
-       ap_queue_init_reply(aq, &zq->reply);
-       aq->request_timeout = CEX2C_CLEANUP_TIME;
-       dev_set_drvdata(&ap_dev->device, zq);
-       rc = zcrypt_queue_register(zq);
-       if (rc) {
-               zcrypt_queue_free(zq);
-               return rc;
-       }
-
-       if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO)) {
-               rc = sysfs_create_group(&ap_dev->device.kobj,
-                                       &cca_queue_attr_grp);
-               if (rc) {
-                       zcrypt_queue_unregister(zq);
-                       zcrypt_queue_free(zq);
-               }
-       }
-
-       return rc;
-}
-
-/*
- * This is called to remove the CEX2C/CEX3C queue driver information
- * if an AP queue device is removed.
- */
-static void zcrypt_cex2c_queue_remove(struct ap_device *ap_dev)
-{
-       struct zcrypt_queue *zq = dev_get_drvdata(&ap_dev->device);
-       struct ap_queue *aq = to_ap_queue(&ap_dev->device);
-
-       if (ap_test_bit(&aq->card->functions, AP_FUNC_COPRO))
-               sysfs_remove_group(&ap_dev->device.kobj, &cca_queue_attr_grp);
-
-       zcrypt_queue_unregister(zq);
-}
-
-static struct ap_driver zcrypt_cex2c_queue_driver = {
-       .probe = zcrypt_cex2c_queue_probe,
-       .remove = zcrypt_cex2c_queue_remove,
-       .ids = zcrypt_cex2c_queue_ids,
-       .flags = AP_DRIVER_FLAG_DEFAULT,
-};
-
-int __init zcrypt_cex2c_init(void)
-{
-       int rc;
-
-       rc = ap_driver_register(&zcrypt_cex2c_card_driver,
-                               THIS_MODULE, "cex2card");
-       if (rc)
-               return rc;
-
-       rc = ap_driver_register(&zcrypt_cex2c_queue_driver,
-                               THIS_MODULE, "cex2cqueue");
-       if (rc)
-               ap_driver_unregister(&zcrypt_cex2c_card_driver);
-
-       return rc;
-}
-
-void zcrypt_cex2c_exit(void)
-{
-       ap_driver_unregister(&zcrypt_cex2c_queue_driver);
-       ap_driver_unregister(&zcrypt_cex2c_card_driver);
-}
-
-module_init(zcrypt_cex2c_init);
-module_exit(zcrypt_cex2c_exit);
index 6ec405c..e69de29 100644 (file)
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- *  Copyright IBM Corp. 2001, 2018
- *  Author(s): Robert Burroughs
- *            Eric Rossman (edrossma@us.ibm.com)
- *
- *  Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com)
- *  Major cleanup & driver split: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *  MSGTYPE restruct:            Holger Dengler <hd@linux.vnet.ibm.com>
- */
-
-#ifndef _ZCRYPT_CEX2C_H_
-#define _ZCRYPT_CEX2C_H_
-
-int zcrypt_cex2c_init(void);
-void zcrypt_cex2c_exit(void);
-
-#endif /* _ZCRYPT_CEX2C_H_ */
index 958f5ee..0a877f9 100644 (file)
@@ -29,6 +29,8 @@
 #define DEBUG_WARN(...) ZCRYPT_DBF(DBF_WARN, ##__VA_ARGS__)
 #define DEBUG_ERR(...) ZCRYPT_DBF(DBF_ERR, ##__VA_ARGS__)
 
+#define EP11_PINBLOB_V1_BYTES 56
+
 /* default iv used here */
 static const u8 def_iv[16] = { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
                               0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff };
@@ -113,6 +115,109 @@ static void __exit card_cache_free(void)
        spin_unlock_bh(&card_list_lock);
 }
 
+static int ep11_kb_split(const u8 *kb, size_t kblen, u32 kbver,
+                        struct ep11kblob_header **kbhdr, size_t *kbhdrsize,
+                        u8 **kbpl, size_t *kbplsize)
+{
+       struct ep11kblob_header *hdr = NULL;
+       size_t hdrsize, plsize = 0;
+       int rc = -EINVAL;
+       u8 *pl = NULL;
+
+       if (kblen < sizeof(struct ep11kblob_header))
+               goto out;
+       hdr = (struct ep11kblob_header *)kb;
+
+       switch (kbver) {
+       case TOKVER_EP11_AES:
+               /* header overlays the payload */
+               hdrsize = 0;
+               break;
+       case TOKVER_EP11_ECC_WITH_HEADER:
+       case TOKVER_EP11_AES_WITH_HEADER:
+               /* payload starts after the header */
+               hdrsize = sizeof(struct ep11kblob_header);
+               break;
+       default:
+               goto out;
+       }
+
+       plsize = kblen - hdrsize;
+       pl = (u8 *)kb + hdrsize;
+
+       if (kbhdr)
+               *kbhdr = hdr;
+       if (kbhdrsize)
+               *kbhdrsize = hdrsize;
+       if (kbpl)
+               *kbpl = pl;
+       if (kbplsize)
+               *kbplsize = plsize;
+
+       rc = 0;
+out:
+       return rc;
+}
+
+static int ep11_kb_decode(const u8 *kb, size_t kblen,
+                         struct ep11kblob_header **kbhdr, size_t *kbhdrsize,
+                         struct ep11keyblob **kbpl, size_t *kbplsize)
+{
+       struct ep11kblob_header *tmph, *hdr = NULL;
+       size_t hdrsize = 0, plsize = 0;
+       struct ep11keyblob *pl = NULL;
+       int rc = -EINVAL;
+       u8 *tmpp;
+
+       if (kblen < sizeof(struct ep11kblob_header))
+               goto out;
+       tmph = (struct ep11kblob_header *)kb;
+
+       if (tmph->type != TOKTYPE_NON_CCA &&
+           tmph->len > kblen)
+               goto out;
+
+       if (ep11_kb_split(kb, kblen, tmph->version,
+                         &hdr, &hdrsize, &tmpp, &plsize))
+               goto out;
+
+       if (plsize < sizeof(struct ep11keyblob))
+               goto out;
+
+       if (!is_ep11_keyblob(tmpp))
+               goto out;
+
+       pl = (struct ep11keyblob *)tmpp;
+       plsize = hdr->len - hdrsize;
+
+       if (kbhdr)
+               *kbhdr = hdr;
+       if (kbhdrsize)
+               *kbhdrsize = hdrsize;
+       if (kbpl)
+               *kbpl = pl;
+       if (kbplsize)
+               *kbplsize = plsize;
+
+       rc = 0;
+out:
+       return rc;
+}
+
+/*
+ * For valid ep11 keyblobs, returns a reference to the wrappingkey verification
+ * pattern. Otherwise NULL.
+ */
+const u8 *ep11_kb_wkvp(const u8 *keyblob, size_t keybloblen)
+{
+       struct ep11keyblob *kb;
+
+       if (ep11_kb_decode(keyblob, keybloblen, NULL, NULL, &kb, NULL))
+               return NULL;
+       return kb->wkvp;
+}
+EXPORT_SYMBOL(ep11_kb_wkvp);
+
 /*
  * Simple check if the key blob is a valid EP11 AES key blob with header.
  */
@@ -489,7 +594,7 @@ static int ep11_query_info(u16 cardnr, u16 domain, u32 query_type,
        struct ep11_cprb *req = NULL, *rep = NULL;
        struct ep11_target_dev target;
        struct ep11_urb *urb = NULL;
-       int api = 1, rc = -ENOMEM;
+       int api = EP11_API_V1, rc = -ENOMEM;
 
        /* request cprb and payload */
        req = alloc_cprb(sizeof(struct ep11_info_req_pl));
@@ -664,8 +769,9 @@ EXPORT_SYMBOL(ep11_get_domain_info);
  */
 #define KEY_ATTR_DEFAULTS 0x00200c00
 
-int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
-                  u8 *keybuf, size_t *keybufsize)
+static int _ep11_genaeskey(u16 card, u16 domain,
+                          u32 keybitsize, u32 keygenflags,
+                          u8 *keybuf, size_t *keybufsize)
 {
        struct keygen_req_pl {
                struct pl_head head;
@@ -685,8 +791,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
                u32 attr_bool_bits;
                u32 attr_val_len_type;
                u32 attr_val_len_value;
-               u8  pin_tag;
-               u8  pin_len;
+               /* followed by empty pin tag or empty pinblob tag */
        } __packed * req_pl;
        struct keygen_rep_pl {
                struct pl_head head;
@@ -699,10 +804,11 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
                u8  data[512];
        } __packed * rep_pl;
        struct ep11_cprb *req = NULL, *rep = NULL;
+       size_t req_pl_size, pinblob_size = 0;
        struct ep11_target_dev target;
        struct ep11_urb *urb = NULL;
-       struct ep11keyblob *kb;
        int api, rc = -ENOMEM;
+       u8 *p;
 
        switch (keybitsize) {
        case 128:
@@ -718,12 +824,22 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        }
 
        /* request cprb and payload */
-       req = alloc_cprb(sizeof(struct keygen_req_pl));
+       api = (!keygenflags || keygenflags & 0x00200000) ?
+               EP11_API_V4 : EP11_API_V1;
+       if (ap_is_se_guest()) {
+               /*
+                * genkey within SE environment requires API ordinal 6
+                * with empty pinblob
+                */
+               api = EP11_API_V6;
+               pinblob_size = EP11_PINBLOB_V1_BYTES;
+       }
+       req_pl_size = sizeof(struct keygen_req_pl) + ASN1TAGLEN(pinblob_size);
+       req = alloc_cprb(req_pl_size);
        if (!req)
                goto out;
        req_pl = (struct keygen_req_pl *)(((u8 *)req) + sizeof(*req));
-       api = (!keygenflags || keygenflags & 0x00200000) ? 4 : 1;
-       prep_head(&req_pl->head, sizeof(*req_pl), api, 21); /* GenerateKey */
+       prep_head(&req_pl->head, req_pl_size, api, 21); /* GenerateKey */
        req_pl->var_tag = 0x04;
        req_pl->var_len = sizeof(u32);
        req_pl->keybytes_tag = 0x04;
@@ -739,7 +855,10 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        req_pl->attr_bool_bits = keygenflags ? keygenflags : KEY_ATTR_DEFAULTS;
        req_pl->attr_val_len_type = 0x00000161; /* CKA_VALUE_LEN */
        req_pl->attr_val_len_value = keybitsize / 8;
-       req_pl->pin_tag = 0x04;
+       p = ((u8 *)req_pl) + sizeof(*req_pl);
+       /* pin tag */
+       *p++ = 0x04;
+       *p++ = pinblob_size;
 
        /* reply cprb and payload */
        rep = alloc_cprb(sizeof(struct keygen_rep_pl));
@@ -754,7 +873,7 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        target.ap_id = card;
        target.dom_id = domain;
        prep_urb(urb, &target, 1,
-                req, sizeof(*req) + sizeof(*req_pl),
+                req, sizeof(*req) + req_pl_size,
                 rep, sizeof(*rep) + sizeof(*rep_pl));
 
        rc = zcrypt_send_ep11_cprb(urb);
@@ -780,14 +899,9 @@ int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
                goto out;
        }
 
-       /* copy key blob and set header values */
+       /* copy key blob */
        memcpy(keybuf, rep_pl->data, rep_pl->data_len);
        *keybufsize = rep_pl->data_len;
-       kb = (struct ep11keyblob *)keybuf;
-       kb->head.type = TOKTYPE_NON_CCA;
-       kb->head.len = rep_pl->data_len;
-       kb->head.version = TOKVER_EP11_AES;
-       kb->head.keybitlen = keybitsize;
 
 out:
        kfree(req);
@@ -795,6 +909,43 @@ out:
        kfree(urb);
        return rc;
 }
+
+int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
+                  u8 *keybuf, size_t *keybufsize, u32 keybufver)
+{
+       struct ep11kblob_header *hdr;
+       size_t hdr_size, pl_size;
+       u8 *pl;
+       int rc;
+
+       switch (keybufver) {
+       case TOKVER_EP11_AES:
+       case TOKVER_EP11_AES_WITH_HEADER:
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       rc = ep11_kb_split(keybuf, *keybufsize, keybufver,
+                          &hdr, &hdr_size, &pl, &pl_size);
+       if (rc)
+               return rc;
+
+       rc = _ep11_genaeskey(card, domain, keybitsize, keygenflags,
+                            pl, &pl_size);
+       if (rc)
+               return rc;
+
+       *keybufsize = hdr_size + pl_size;
+
+       /* update header information */
+       hdr->type = TOKTYPE_NON_CCA;
+       hdr->len = *keybufsize;
+       hdr->version = keybufver;
+       hdr->bitlen = keybitsize;
+
+       return 0;
+}
 EXPORT_SYMBOL(ep11_genaeskey);
 
 static int ep11_cryptsingle(u16 card, u16 domain,
@@ -830,7 +981,7 @@ static int ep11_cryptsingle(u16 card, u16 domain,
        struct ep11_target_dev target;
        struct ep11_urb *urb = NULL;
        size_t req_pl_size, rep_pl_size;
-       int n, api = 1, rc = -ENOMEM;
+       int n, api = EP11_API_V1, rc = -ENOMEM;
        u8 *p;
 
        /* the simple asn1 coding used has length limits */
@@ -924,12 +1075,12 @@ out:
        return rc;
 }
 
-static int ep11_unwrapkey(u16 card, u16 domain,
-                         const u8 *kek, size_t keksize,
-                         const u8 *enckey, size_t enckeysize,
-                         u32 mech, const u8 *iv,
-                         u32 keybitsize, u32 keygenflags,
-                         u8 *keybuf, size_t *keybufsize)
+static int _ep11_unwrapkey(u16 card, u16 domain,
+                          const u8 *kek, size_t keksize,
+                          const u8 *enckey, size_t enckeysize,
+                          u32 mech, const u8 *iv,
+                          u32 keybitsize, u32 keygenflags,
+                          u8 *keybuf, size_t *keybufsize)
 {
        struct uw_req_pl {
                struct pl_head head;
@@ -949,7 +1100,7 @@ static int ep11_unwrapkey(u16 card, u16 domain,
                 * maybe followed by iv data
                 * followed by kek tag + kek blob
                 * followed by empty mac tag
-                * followed by empty pin tag
+                * followed by empty pin tag or empty pinblob tag
                 * followed by encryted key tag + bytes
                 */
        } __packed * req_pl;
@@ -964,21 +1115,30 @@ static int ep11_unwrapkey(u16 card, u16 domain,
                u8  data[512];
        } __packed * rep_pl;
        struct ep11_cprb *req = NULL, *rep = NULL;
+       size_t req_pl_size, pinblob_size = 0;
        struct ep11_target_dev target;
        struct ep11_urb *urb = NULL;
-       struct ep11keyblob *kb;
-       size_t req_pl_size;
        int api, rc = -ENOMEM;
        u8 *p;
 
        /* request cprb and payload */
+       api = (!keygenflags || keygenflags & 0x00200000) ?
+               EP11_API_V4 : EP11_API_V1;
+       if (ap_is_se_guest()) {
+               /*
+                * unwrap within SE environment requires API ordinal 6
+                * with empty pinblob
+                */
+               api = EP11_API_V6;
+               pinblob_size = EP11_PINBLOB_V1_BYTES;
+       }
        req_pl_size = sizeof(struct uw_req_pl) + (iv ? 16 : 0)
-               + ASN1TAGLEN(keksize) + 4 + ASN1TAGLEN(enckeysize);
+               + ASN1TAGLEN(keksize) + ASN1TAGLEN(0)
+               + ASN1TAGLEN(pinblob_size) + ASN1TAGLEN(enckeysize);
        req = alloc_cprb(req_pl_size);
        if (!req)
                goto out;
        req_pl = (struct uw_req_pl *)(((u8 *)req) + sizeof(*req));
-       api = (!keygenflags || keygenflags & 0x00200000) ? 4 : 1;
        prep_head(&req_pl->head, req_pl_size, api, 34); /* UnwrapKey */
        req_pl->attr_tag = 0x04;
        req_pl->attr_len = 7 * sizeof(u32);
@@ -1003,9 +1163,10 @@ static int ep11_unwrapkey(u16 card, u16 domain,
        /* empty mac key tag */
        *p++ = 0x04;
        *p++ = 0;
-       /* empty pin tag */
+       /* pin tag */
        *p++ = 0x04;
-       *p++ = 0;
+       *p++ = pinblob_size;
+       p += pinblob_size;
        /* encrypted key value tag and bytes */
        p += asn1tag_write(p, 0x04, enckey, enckeysize);
 
@@ -1048,14 +1209,9 @@ static int ep11_unwrapkey(u16 card, u16 domain,
                goto out;
        }
 
-       /* copy key blob and set header values */
+       /* copy key blob */
        memcpy(keybuf, rep_pl->data, rep_pl->data_len);
        *keybufsize = rep_pl->data_len;
-       kb = (struct ep11keyblob *)keybuf;
-       kb->head.type = TOKTYPE_NON_CCA;
-       kb->head.len = rep_pl->data_len;
-       kb->head.version = TOKVER_EP11_AES;
-       kb->head.keybitlen = keybitsize;
 
 out:
        kfree(req);
@@ -1064,10 +1220,46 @@ out:
        return rc;
 }
 
-static int ep11_wrapkey(u16 card, u16 domain,
-                       const u8 *key, size_t keysize,
-                       u32 mech, const u8 *iv,
-                       u8 *databuf, size_t *datasize)
+static int ep11_unwrapkey(u16 card, u16 domain,
+                         const u8 *kek, size_t keksize,
+                         const u8 *enckey, size_t enckeysize,
+                         u32 mech, const u8 *iv,
+                         u32 keybitsize, u32 keygenflags,
+                         u8 *keybuf, size_t *keybufsize,
+                         u8 keybufver)
+{
+       struct ep11kblob_header *hdr;
+       size_t hdr_size, pl_size;
+       u8 *pl;
+       int rc;
+
+       rc = ep11_kb_split(keybuf, *keybufsize, keybufver,
+                          &hdr, &hdr_size, &pl, &pl_size);
+       if (rc)
+               return rc;
+
+       rc = _ep11_unwrapkey(card, domain, kek, keksize, enckey, enckeysize,
+                            mech, iv, keybitsize, keygenflags,
+                            pl, &pl_size);
+       if (rc)
+               return rc;
+
+       *keybufsize = hdr_size + pl_size;
+
+       /* update header information */
+       hdr = (struct ep11kblob_header *)keybuf;
+       hdr->type = TOKTYPE_NON_CCA;
+       hdr->len = *keybufsize;
+       hdr->version = keybufver;
+       hdr->bitlen = keybitsize;
+
+       return 0;
+}
+
+static int _ep11_wrapkey(u16 card, u16 domain,
+                        const u8 *key, size_t keysize,
+                        u32 mech, const u8 *iv,
+                        u8 *databuf, size_t *datasize)
 {
        struct wk_req_pl {
                struct pl_head head;
@@ -1097,20 +1289,10 @@ static int ep11_wrapkey(u16 card, u16 domain,
        struct ep11_cprb *req = NULL, *rep = NULL;
        struct ep11_target_dev target;
        struct ep11_urb *urb = NULL;
-       struct ep11keyblob *kb;
        size_t req_pl_size;
        int api, rc = -ENOMEM;
-       bool has_header = false;
        u8 *p;
 
-       /* maybe the session field holds a header with key info */
-       kb = (struct ep11keyblob *)key;
-       if (kb->head.type == TOKTYPE_NON_CCA &&
-           kb->head.version == TOKVER_EP11_AES) {
-               has_header = true;
-               keysize = min_t(size_t, kb->head.len, keysize);
-       }
-
        /* request cprb and payload */
        req_pl_size = sizeof(struct wk_req_pl) + (iv ? 16 : 0)
                + ASN1TAGLEN(keysize) + 4;
@@ -1120,7 +1302,8 @@ static int ep11_wrapkey(u16 card, u16 domain,
        if (!mech || mech == 0x80060001)
                req->flags |= 0x20; /* CPACF_WRAP needs special bit */
        req_pl = (struct wk_req_pl *)(((u8 *)req) + sizeof(*req));
-       api = (!mech || mech == 0x80060001) ? 4 : 1; /* CKM_IBM_CPACF_WRAP */
+       api = (!mech || mech == 0x80060001) ? /* CKM_IBM_CPACF_WRAP */
+               EP11_API_V4 : EP11_API_V1;
        prep_head(&req_pl->head, req_pl_size, api, 33); /* WrapKey */
        req_pl->var_tag = 0x04;
        req_pl->var_len = sizeof(u32);
@@ -1135,11 +1318,6 @@ static int ep11_wrapkey(u16 card, u16 domain,
        }
        /* key blob */
        p += asn1tag_write(p, 0x04, key, keysize);
-       /* maybe the key argument needs the head data cleaned out */
-       if (has_header) {
-               kb = (struct ep11keyblob *)(p - keysize);
-               memset(&kb->head, 0, sizeof(kb->head));
-       }
        /* empty kek tag */
        *p++ = 0x04;
        *p++ = 0;
@@ -1198,10 +1376,10 @@ out:
 }
 
 int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
-                    const u8 *clrkey, u8 *keybuf, size_t *keybufsize)
+                    const u8 *clrkey, u8 *keybuf, size_t *keybufsize,
+                    u32 keytype)
 {
        int rc;
-       struct ep11keyblob *kb;
        u8 encbuf[64], *kek = NULL;
        size_t clrkeylen, keklen, encbuflen = sizeof(encbuf);
 
@@ -1223,17 +1401,15 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        }
 
        /* Step 1: generate AES 256 bit random kek key */
-       rc = ep11_genaeskey(card, domain, 256,
-                           0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */
-                           kek, &keklen);
+       rc = _ep11_genaeskey(card, domain, 256,
+                            0x00006c00, /* EN/DECRYPT, WRAP/UNWRAP */
+                            kek, &keklen);
        if (rc) {
                DEBUG_ERR(
                        "%s generate kek key failed, rc=%d\n",
                        __func__, rc);
                goto out;
        }
-       kb = (struct ep11keyblob *)kek;
-       memset(&kb->head, 0, sizeof(kb->head));
 
        /* Step 2: encrypt clear key value with the kek key */
        rc = ep11_cryptsingle(card, domain, 0, 0, def_iv, kek, keklen,
@@ -1248,7 +1424,7 @@ int ep11_clr2keyblob(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
        /* Step 3: import the encrypted key value as a new key */
        rc = ep11_unwrapkey(card, domain, kek, keklen,
                            encbuf, encbuflen, 0, def_iv,
-                           keybitsize, 0, keybuf, keybufsize);
+                           keybitsize, 0, keybuf, keybufsize, keytype);
        if (rc) {
                DEBUG_ERR(
                        "%s importing key value as new key failed,, rc=%d\n",
@@ -1262,11 +1438,12 @@ out:
 }
 EXPORT_SYMBOL(ep11_clr2keyblob);
 
-int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
+int ep11_kblob2protkey(u16 card, u16 dom,
+                      const u8 *keyblob, size_t keybloblen,
                       u8 *protkey, u32 *protkeylen, u32 *protkeytype)
 {
-       int rc = -EIO;
-       u8 *wkbuf = NULL;
+       struct ep11kblob_header *hdr;
+       struct ep11keyblob *key;
        size_t wkbuflen, keylen;
        struct wk_info {
                u16 version;
@@ -1277,31 +1454,17 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
                u8  res2[8];
                u8  pkey[];
        } __packed * wki;
-       const u8 *key;
-       struct ep11kblob_header *hdr;
+       u8 *wkbuf = NULL;
+       int rc = -EIO;
 
-       /* key with or without header ? */
-       hdr = (struct ep11kblob_header *)keyblob;
-       if (hdr->type == TOKTYPE_NON_CCA &&
-           (hdr->version == TOKVER_EP11_AES_WITH_HEADER ||
-            hdr->version == TOKVER_EP11_ECC_WITH_HEADER) &&
-           is_ep11_keyblob(keyblob + sizeof(struct ep11kblob_header))) {
-               /* EP11 AES or ECC key with header */
-               key = keyblob + sizeof(struct ep11kblob_header);
-               keylen = hdr->len - sizeof(struct ep11kblob_header);
-       } else if (hdr->type == TOKTYPE_NON_CCA &&
-                  hdr->version == TOKVER_EP11_AES &&
-                  is_ep11_keyblob(keyblob)) {
-               /* EP11 AES key (old style) */
-               key = keyblob;
-               keylen = hdr->len;
-       } else if (is_ep11_keyblob(keyblob)) {
-               /* raw EP11 key blob */
-               key = keyblob;
-               keylen = keybloblen;
-       } else {
+       if (ep11_kb_decode((u8 *)keyblob, keybloblen, &hdr, NULL, &key, &keylen))
                return -EINVAL;
+
+       if (hdr->version == TOKVER_EP11_AES) {
+               /* wipe overlayed header */
+               memset(hdr, 0, sizeof(*hdr));
        }
+       /* !!! hdr is no longer a valid header !!! */
 
        /* alloc temp working buffer */
        wkbuflen = (keylen + AES_BLOCK_SIZE) & (~(AES_BLOCK_SIZE - 1));
@@ -1310,8 +1473,8 @@ int ep11_kblob2protkey(u16 card, u16 dom, const u8 *keyblob, size_t keybloblen,
                return -ENOMEM;
 
        /* ep11 secure key -> protected key + info */
-       rc = ep11_wrapkey(card, dom, key, keylen,
-                         0, def_iv, wkbuf, &wkbuflen);
+       rc = _ep11_wrapkey(card, dom, (u8 *)key, keylen,
+                          0, def_iv, wkbuf, &wkbuflen);
        if (rc) {
                DEBUG_ERR(
                        "%s rewrapping ep11 key to pkey failed, rc=%d\n",
index a3eddf5..9d17fd5 100644 (file)
@@ -12,7 +12,9 @@
 #include <asm/zcrypt.h>
 #include <asm/pkey.h>
 
-#define EP11_API_V 4  /* highest known and supported EP11 API version */
+#define EP11_API_V1 1  /* min EP11 API, default if no higher api required */
+#define EP11_API_V4 4  /* supported EP11 API for the ep11misc cprbs */
+#define EP11_API_V6 6  /* min EP11 API for some cprbs in SE environment */
 #define EP11_STRUCT_MAGIC 0x1234
 #define EP11_BLOB_PKEY_EXTRACTABLE 0x00200000
 
@@ -29,14 +31,7 @@ struct ep11keyblob {
        union {
                u8 session[32];
                /* only used for PKEY_TYPE_EP11: */
-               struct {
-                       u8  type;      /* 0x00 (TOKTYPE_NON_CCA) */
-                       u8  res0;      /* unused */
-                       u16 len;       /* total length in bytes of this blob */
-                       u8  version;   /* 0x03 (TOKVER_EP11_AES) */
-                       u8  res1;      /* unused */
-                       u16 keybitlen; /* clear key bit len, 0 for unknown */
-               } head;
+               struct ep11kblob_header head;
        };
        u8  wkvp[16];  /* wrapping key verification pattern */
        u64 attr;      /* boolean key attributes */
@@ -56,6 +51,12 @@ static inline bool is_ep11_keyblob(const u8 *key)
 }
 
 /*
+ * For valid ep11 keyblobs, returns a reference to the wrappingkey verification
+ * pattern. Otherwise NULL.
+ */
+const u8 *ep11_kb_wkvp(const u8 *kblob, size_t kbloblen);
+
+/*
  * Simple check if the key blob is a valid EP11 AES key blob with header.
  * If checkcpacfexport is enabled, the key is also checked for the
  * attributes needed to export this key for CPACF use.
@@ -114,13 +115,14 @@ int ep11_get_domain_info(u16 card, u16 domain, struct ep11_domain_info *info);
  * Generate (random) EP11 AES secure key.
  */
 int ep11_genaeskey(u16 card, u16 domain, u32 keybitsize, u32 keygenflags,
-                  u8 *keybuf, size_t *keybufsize);
+                  u8 *keybuf, size_t *keybufsize, u32 keybufver);
 
 /*
  * Generate EP11 AES secure key with given clear key value.
  */
 int ep11_clr2keyblob(u16 cardnr, u16 domain, u32 keybitsize, u32 keygenflags,
-                    const u8 *clrkey, u8 *keybuf, size_t *keybufsize);
+                    const u8 *clrkey, u8 *keybuf, size_t *keybufsize,
+                    u32 keytype);
 
 /*
  * Build a list of ep11 apqns meeting the following constrains:
index 51f8f7a..2e155de 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- *  Copyright IBM Corp. 2001, 2012
+ *  Copyright IBM Corp. 2001, 2023
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *
 /* >= CEX3A: 4096 bits */
 #define CEX3A_MAX_MOD_SIZE 512
 
-/* CEX2A: max outputdatalength + type80_hdr */
-#define CEX2A_MAX_RESPONSE_SIZE 0x110
-
 /* >= CEX3A: 512 bit modulus, (max outputdatalength) + type80_hdr */
 #define CEX3A_MAX_RESPONSE_SIZE 0x210
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("Cryptographic Accelerator (message type 50), " \
-                  "Copyright IBM Corp. 2001, 2012");
+                  "Copyright IBM Corp. 2001, 2023");
 MODULE_LICENSE("GPL");
 
 /*
@@ -366,20 +363,17 @@ static int convert_type80(struct zcrypt_queue *zq,
                ap_send_online_uevent(&zq->queue->ap_dev, zq->online);
                return -EAGAIN;
        }
-       if (zq->zcard->user_space_type == ZCRYPT_CEX2A)
-               BUG_ON(t80h->len > CEX2A_MAX_RESPONSE_SIZE);
-       else
-               BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE);
+       BUG_ON(t80h->len > CEX3A_MAX_RESPONSE_SIZE);
        data = reply->msg + t80h->len - outputdatalength;
        if (copy_to_user(outputdata, data, outputdatalength))
                return -EFAULT;
        return 0;
 }
 
-static int convert_response_cex2a(struct zcrypt_queue *zq,
-                                 struct ap_message *reply,
-                                 char __user *outputdata,
-                                 unsigned int outputdatalength)
+static int convert_response(struct zcrypt_queue *zq,
+                           struct ap_message *reply,
+                           char __user *outputdata,
+                           unsigned int outputdatalength)
 {
        /* Response type byte is the second byte in the response. */
        unsigned char rtype = ((unsigned char *)reply->msg)[1];
@@ -414,9 +408,9 @@ static int convert_response_cex2a(struct zcrypt_queue *zq,
  * @msg: pointer to the AP message
  * @reply: pointer to the AP reply message
  */
-static void zcrypt_cex2a_receive(struct ap_queue *aq,
-                                struct ap_message *msg,
-                                struct ap_message *reply)
+static void zcrypt_msgtype50_receive(struct ap_queue *aq,
+                                    struct ap_message *msg,
+                                    struct ap_message *reply)
 {
        static struct error_hdr error_reply = {
                .type = TYPE82_RSP_CODE,
@@ -456,19 +450,18 @@ static atomic_t zcrypt_step = ATOMIC_INIT(0);
  *     CEXxA device to the request distributor
  * @mex: pointer to the modexpo request buffer
  */
-static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
-                                struct ica_rsa_modexpo *mex,
-                                struct ap_message *ap_msg)
+static long zcrypt_msgtype50_modexpo(struct zcrypt_queue *zq,
+                                    struct ica_rsa_modexpo *mex,
+                                    struct ap_message *ap_msg)
 {
        struct completion work;
        int rc;
 
-       ap_msg->bufsize = (zq->zcard->user_space_type == ZCRYPT_CEX2A) ?
-               MSGTYPE50_CRB2_MAX_MSG_SIZE : MSGTYPE50_CRB3_MAX_MSG_SIZE;
+       ap_msg->bufsize = MSGTYPE50_CRB3_MAX_MSG_SIZE;
        ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
        if (!ap_msg->msg)
                return -ENOMEM;
-       ap_msg->receive = zcrypt_cex2a_receive;
+       ap_msg->receive = zcrypt_msgtype50_receive;
        ap_msg->psmid = (((unsigned long)current->pid) << 32) +
                atomic_inc_return(&zcrypt_step);
        ap_msg->private = &work;
@@ -483,9 +476,9 @@ static long zcrypt_cex2a_modexpo(struct zcrypt_queue *zq,
        if (rc == 0) {
                rc = ap_msg->rc;
                if (rc == 0)
-                       rc = convert_response_cex2a(zq, ap_msg,
-                                                   mex->outputdata,
-                                                   mex->outputdatalength);
+                       rc = convert_response(zq, ap_msg,
+                                             mex->outputdata,
+                                             mex->outputdatalength);
        } else {
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
@@ -507,19 +500,18 @@ out:
  *     CEXxA device to the request distributor
  * @crt: pointer to the modexpoc_crt request buffer
  */
-static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
-                                    struct ica_rsa_modexpo_crt *crt,
-                                    struct ap_message *ap_msg)
+static long zcrypt_msgtype50_modexpo_crt(struct zcrypt_queue *zq,
+                                        struct ica_rsa_modexpo_crt *crt,
+                                        struct ap_message *ap_msg)
 {
        struct completion work;
        int rc;
 
-       ap_msg->bufsize = (zq->zcard->user_space_type == ZCRYPT_CEX2A) ?
-               MSGTYPE50_CRB2_MAX_MSG_SIZE : MSGTYPE50_CRB3_MAX_MSG_SIZE;
+       ap_msg->bufsize = MSGTYPE50_CRB3_MAX_MSG_SIZE;
        ap_msg->msg = kmalloc(ap_msg->bufsize, GFP_KERNEL);
        if (!ap_msg->msg)
                return -ENOMEM;
-       ap_msg->receive = zcrypt_cex2a_receive;
+       ap_msg->receive = zcrypt_msgtype50_receive;
        ap_msg->psmid = (((unsigned long)current->pid) << 32) +
                atomic_inc_return(&zcrypt_step);
        ap_msg->private = &work;
@@ -534,9 +526,9 @@ static long zcrypt_cex2a_modexpo_crt(struct zcrypt_queue *zq,
        if (rc == 0) {
                rc = ap_msg->rc;
                if (rc == 0)
-                       rc = convert_response_cex2a(zq, ap_msg,
-                                                   crt->outputdata,
-                                                   crt->outputdatalength);
+                       rc = convert_response(zq, ap_msg,
+                                             crt->outputdata,
+                                             crt->outputdatalength);
        } else {
                /* Signal pending. */
                ap_cancel_message(zq->queue, ap_msg);
@@ -555,8 +547,8 @@ out:
  * The crypto operations for message type 50.
  */
 static struct zcrypt_ops zcrypt_msgtype50_ops = {
-       .rsa_modexpo = zcrypt_cex2a_modexpo,
-       .rsa_modexpo_crt = zcrypt_cex2a_modexpo_crt,
+       .rsa_modexpo = zcrypt_msgtype50_modexpo,
+       .rsa_modexpo_crt = zcrypt_msgtype50_modexpo_crt,
        .owner = THIS_MODULE,
        .name = MSGTYPE50_NAME,
        .variant = MSGTYPE50_VARIANT_DEFAULT,
index eb49f06..323e93b 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ */
 /*
- *  Copyright IBM Corp. 2001, 2012
+ *  Copyright IBM Corp. 2001, 2023
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *
@@ -15,7 +15,6 @@
 #define MSGTYPE50_NAME                 "zcrypt_msgtype50"
 #define MSGTYPE50_VARIANT_DEFAULT      0
 
-#define MSGTYPE50_CRB2_MAX_MSG_SIZE 0x390 /* sizeof(struct type50_crb2_msg) */
 #define MSGTYPE50_CRB3_MAX_MSG_SIZE 0x710 /* sizeof(struct type50_crb3_msg) */
 
 #define MSGTYPE_ADJUSTMENT 0x08  /* type04 extension (not needed in type50) */
index 67fd2ec..3c53abb 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- *  Copyright IBM Corp. 2001, 2022
+ *  Copyright IBM Corp. 2001, 2023
  *  Author(s): Robert Burroughs
  *            Eric Rossman (edrossma@us.ibm.com)
  *
@@ -42,7 +42,7 @@ struct response_type {
 
 MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \
-                  "Copyright IBM Corp. 2001, 2012");
+                  "Copyright IBM Corp. 2001, 2023");
 MODULE_LICENSE("GPL");
 
 struct function_and_rules_block {
@@ -1101,23 +1101,36 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
                                      struct ica_xcRB *xcrb,
                                      struct ap_message *ap_msg)
 {
-       int rc;
        struct response_type *rtype = ap_msg->private;
        struct {
                struct type6_hdr hdr;
                struct CPRBX cprbx;
                /* ... more data blocks ... */
        } __packed * msg = ap_msg->msg;
-
-       /*
-        * Set the queue's reply buffer length minus 128 byte padding
-        * as reply limit for the card firmware.
-        */
-       msg->hdr.fromcardlen1 = min_t(unsigned int, msg->hdr.fromcardlen1,
-                                     zq->reply.bufsize - 128);
-       if (msg->hdr.fromcardlen2)
-               msg->hdr.fromcardlen2 =
-                       zq->reply.bufsize - msg->hdr.fromcardlen1 - 128;
+       unsigned int max_payload_size;
+       int rc, delta;
+
+       /* calculate maximum payload for this card and msg type */
+       max_payload_size = zq->reply.bufsize - sizeof(struct type86_fmt2_msg);
+
+       /* limit each of the two from fields to the maximum payload size */
+       msg->hdr.fromcardlen1 = min(msg->hdr.fromcardlen1, max_payload_size);
+       msg->hdr.fromcardlen2 = min(msg->hdr.fromcardlen2, max_payload_size);
+
+       /* calculate delta if the sum of both exceeds max payload size */
+       delta = msg->hdr.fromcardlen1 + msg->hdr.fromcardlen2
+               - max_payload_size;
+       if (delta > 0) {
+               /*
+                * Sum exceeds maximum payload size, prune fromcardlen1
+                * (always trust fromcardlen2)
+                */
+               if (delta > msg->hdr.fromcardlen1) {
+                       rc = -EINVAL;
+                       goto out;
+               }
+               msg->hdr.fromcardlen1 -= delta;
+       }
 
        init_completion(&rtype->work);
        rc = ap_queue_message(zq->queue, ap_msg);
@@ -1335,14 +1348,6 @@ out:
 /*
  * The crypto operations for a CEXxC card.
  */
-static struct zcrypt_ops zcrypt_msgtype6_norng_ops = {
-       .owner = THIS_MODULE,
-       .name = MSGTYPE06_NAME,
-       .variant = MSGTYPE06_VARIANT_NORNG,
-       .rsa_modexpo = zcrypt_msgtype6_modexpo,
-       .rsa_modexpo_crt = zcrypt_msgtype6_modexpo_crt,
-       .send_cprb = zcrypt_msgtype6_send_cprb,
-};
 
 static struct zcrypt_ops zcrypt_msgtype6_ops = {
        .owner = THIS_MODULE,
@@ -1365,14 +1370,12 @@ static struct zcrypt_ops zcrypt_msgtype6_ep11_ops = {
 
 void __init zcrypt_msgtype6_init(void)
 {
-       zcrypt_msgtype_register(&zcrypt_msgtype6_norng_ops);
        zcrypt_msgtype_register(&zcrypt_msgtype6_ops);
        zcrypt_msgtype_register(&zcrypt_msgtype6_ep11_ops);
 }
 
 void __exit zcrypt_msgtype6_exit(void)
 {
-       zcrypt_msgtype_unregister(&zcrypt_msgtype6_norng_ops);
        zcrypt_msgtype_unregister(&zcrypt_msgtype6_ops);
        zcrypt_msgtype_unregister(&zcrypt_msgtype6_ep11_ops);
 }
index 9b5fccd..6df7f37 100644 (file)
@@ -36,7 +36,7 @@ static const struct smcd_ops ism_ops;
 static struct ism_client *clients[MAX_CLIENTS];        /* use an array rather than */
                                                /* a list for fast mapping  */
 static u8 max_client;
-static DEFINE_SPINLOCK(clients_lock);
+static DEFINE_MUTEX(clients_lock);
 struct ism_dev_list {
        struct list_head list;
        struct mutex mutex; /* protects ism device list */
@@ -47,14 +47,22 @@ static struct ism_dev_list ism_dev_list = {
        .mutex = __MUTEX_INITIALIZER(ism_dev_list.mutex),
 };
 
+static void ism_setup_forwarding(struct ism_client *client, struct ism_dev *ism)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&ism->lock, flags);
+       ism->subs[client->id] = client;
+       spin_unlock_irqrestore(&ism->lock, flags);
+}
+
 int ism_register_client(struct ism_client *client)
 {
        struct ism_dev *ism;
-       unsigned long flags;
        int i, rc = -ENOSPC;
 
        mutex_lock(&ism_dev_list.mutex);
-       spin_lock_irqsave(&clients_lock, flags);
+       mutex_lock(&clients_lock);
        for (i = 0; i < MAX_CLIENTS; ++i) {
                if (!clients[i]) {
                        clients[i] = client;
@@ -65,12 +73,14 @@ int ism_register_client(struct ism_client *client)
                        break;
                }
        }
-       spin_unlock_irqrestore(&clients_lock, flags);
+       mutex_unlock(&clients_lock);
+
        if (i < MAX_CLIENTS) {
                /* initialize with all devices that we got so far */
                list_for_each_entry(ism, &ism_dev_list.list, list) {
                        ism->priv[i] = NULL;
                        client->add(ism);
+                       ism_setup_forwarding(client, ism);
                }
        }
        mutex_unlock(&ism_dev_list.mutex);
@@ -86,25 +96,32 @@ int ism_unregister_client(struct ism_client *client)
        int rc = 0;
 
        mutex_lock(&ism_dev_list.mutex);
-       spin_lock_irqsave(&clients_lock, flags);
-       clients[client->id] = NULL;
-       if (client->id + 1 == max_client)
-               max_client--;
-       spin_unlock_irqrestore(&clients_lock, flags);
        list_for_each_entry(ism, &ism_dev_list.list, list) {
+               spin_lock_irqsave(&ism->lock, flags);
+               /* Stop forwarding IRQs and events */
+               ism->subs[client->id] = NULL;
                for (int i = 0; i < ISM_NR_DMBS; ++i) {
                        if (ism->sba_client_arr[i] == client->id) {
-                               pr_err("%s: attempt to unregister client '%s'"
-                                      "with registered dmb(s)\n", __func__,
-                                      client->name);
+                               WARN(1, "%s: attempt to unregister '%s' with registered dmb(s)\n",
+                                    __func__, client->name);
                                rc = -EBUSY;
-                               goto out;
+                               goto err_reg_dmb;
                        }
                }
+               spin_unlock_irqrestore(&ism->lock, flags);
        }
-out:
        mutex_unlock(&ism_dev_list.mutex);
 
+       mutex_lock(&clients_lock);
+       clients[client->id] = NULL;
+       if (client->id + 1 == max_client)
+               max_client--;
+       mutex_unlock(&clients_lock);
+       return rc;
+
+err_reg_dmb:
+       spin_unlock_irqrestore(&ism->lock, flags);
+       mutex_unlock(&ism_dev_list.mutex);
        return rc;
 }
 EXPORT_SYMBOL_GPL(ism_unregister_client);
@@ -328,6 +345,7 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
                     struct ism_client *client)
 {
        union ism_reg_dmb cmd;
+       unsigned long flags;
        int ret;
 
        ret = ism_alloc_dmb(ism, dmb);
@@ -351,7 +369,9 @@ int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
                goto out;
        }
        dmb->dmb_tok = cmd.response.dmb_tok;
+       spin_lock_irqsave(&ism->lock, flags);
        ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = client->id;
+       spin_unlock_irqrestore(&ism->lock, flags);
 out:
        return ret;
 }
@@ -360,6 +380,7 @@ EXPORT_SYMBOL_GPL(ism_register_dmb);
 int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 {
        union ism_unreg_dmb cmd;
+       unsigned long flags;
        int ret;
 
        memset(&cmd, 0, sizeof(cmd));
@@ -368,7 +389,9 @@ int ism_unregister_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 
        cmd.request.dmb_tok = dmb->dmb_tok;
 
+       spin_lock_irqsave(&ism->lock, flags);
        ism->sba_client_arr[dmb->sba_idx - ISM_DMB_BIT_OFFSET] = NO_CLIENT;
+       spin_unlock_irqrestore(&ism->lock, flags);
 
        ret = ism_cmd(ism, &cmd);
        if (ret && ret != ISM_ERROR)
@@ -491,6 +514,7 @@ static u16 ism_get_chid(struct ism_dev *ism)
 static void ism_handle_event(struct ism_dev *ism)
 {
        struct ism_event *entry;
+       struct ism_client *clt;
        int i;
 
        while ((ism->ieq_idx + 1) != READ_ONCE(ism->ieq->header.idx)) {
@@ -499,21 +523,21 @@ static void ism_handle_event(struct ism_dev *ism)
 
                entry = &ism->ieq->entry[ism->ieq_idx];
                debug_event(ism_debug_info, 2, entry, sizeof(*entry));
-               spin_lock(&clients_lock);
-               for (i = 0; i < max_client; ++i)
-                       if (clients[i])
-                               clients[i]->handle_event(ism, entry);
-               spin_unlock(&clients_lock);
+               for (i = 0; i < max_client; ++i) {
+                       clt = ism->subs[i];
+                       if (clt)
+                               clt->handle_event(ism, entry);
+               }
        }
 }
 
 static irqreturn_t ism_handle_irq(int irq, void *data)
 {
        struct ism_dev *ism = data;
-       struct ism_client *clt;
        unsigned long bit, end;
        unsigned long *bv;
        u16 dmbemask;
+       u8 client_id;
 
        bv = (void *) &ism->sba->dmb_bits[ISM_DMB_WORD_OFFSET];
        end = sizeof(ism->sba->dmb_bits) * BITS_PER_BYTE - ISM_DMB_BIT_OFFSET;
@@ -530,8 +554,10 @@ static irqreturn_t ism_handle_irq(int irq, void *data)
                dmbemask = ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET];
                ism->sba->dmbe_mask[bit + ISM_DMB_BIT_OFFSET] = 0;
                barrier();
-               clt = clients[ism->sba_client_arr[bit]];
-               clt->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask);
+               client_id = ism->sba_client_arr[bit];
+               if (unlikely(client_id == NO_CLIENT || !ism->subs[client_id]))
+                       continue;
+               ism->subs[client_id]->handle_irq(ism, bit + ISM_DMB_BIT_OFFSET, dmbemask);
        }
 
        if (ism->sba->e) {
@@ -548,20 +574,9 @@ static u64 ism_get_local_gid(struct ism_dev *ism)
        return ism->local_gid;
 }
 
-static void ism_dev_add_work_func(struct work_struct *work)
-{
-       struct ism_client *client = container_of(work, struct ism_client,
-                                                add_work);
-
-       client->add(client->tgt_ism);
-       atomic_dec(&client->tgt_ism->add_dev_cnt);
-       wake_up(&client->tgt_ism->waitq);
-}
-
 static int ism_dev_init(struct ism_dev *ism)
 {
        struct pci_dev *pdev = ism->pdev;
-       unsigned long flags;
        int i, ret;
 
        ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
@@ -594,25 +609,16 @@ static int ism_dev_init(struct ism_dev *ism)
                /* hardware is V2 capable */
                ism_create_system_eid();
 
-       init_waitqueue_head(&ism->waitq);
-       atomic_set(&ism->free_clients_cnt, 0);
-       atomic_set(&ism->add_dev_cnt, 0);
-
-       wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt));
-       spin_lock_irqsave(&clients_lock, flags);
-       for (i = 0; i < max_client; ++i)
+       mutex_lock(&ism_dev_list.mutex);
+       mutex_lock(&clients_lock);
+       for (i = 0; i < max_client; ++i) {
                if (clients[i]) {
-                       INIT_WORK(&clients[i]->add_work,
-                                 ism_dev_add_work_func);
-                       clients[i]->tgt_ism = ism;
-                       atomic_inc(&ism->add_dev_cnt);
-                       schedule_work(&clients[i]->add_work);
+                       clients[i]->add(ism);
+                       ism_setup_forwarding(clients[i], ism);
                }
-       spin_unlock_irqrestore(&clients_lock, flags);
-
-       wait_event(ism->waitq, !atomic_read(&ism->add_dev_cnt));
+       }
+       mutex_unlock(&clients_lock);
 
-       mutex_lock(&ism_dev_list.mutex);
        list_add(&ism->list, &ism_dev_list.list);
        mutex_unlock(&ism_dev_list.mutex);
 
@@ -687,36 +693,24 @@ err_dev:
        return ret;
 }
 
-static void ism_dev_remove_work_func(struct work_struct *work)
-{
-       struct ism_client *client = container_of(work, struct ism_client,
-                                                remove_work);
-
-       client->remove(client->tgt_ism);
-       atomic_dec(&client->tgt_ism->free_clients_cnt);
-       wake_up(&client->tgt_ism->waitq);
-}
-
-/* Callers must hold ism_dev_list.mutex */
 static void ism_dev_exit(struct ism_dev *ism)
 {
        struct pci_dev *pdev = ism->pdev;
        unsigned long flags;
        int i;
 
-       wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt));
-       spin_lock_irqsave(&clients_lock, flags);
+       spin_lock_irqsave(&ism->lock, flags);
        for (i = 0; i < max_client; ++i)
-               if (clients[i]) {
-                       INIT_WORK(&clients[i]->remove_work,
-                                 ism_dev_remove_work_func);
-                       clients[i]->tgt_ism = ism;
-                       atomic_inc(&ism->free_clients_cnt);
-                       schedule_work(&clients[i]->remove_work);
-               }
-       spin_unlock_irqrestore(&clients_lock, flags);
+               ism->subs[i] = NULL;
+       spin_unlock_irqrestore(&ism->lock, flags);
 
-       wait_event(ism->waitq, !atomic_read(&ism->free_clients_cnt));
+       mutex_lock(&ism_dev_list.mutex);
+       mutex_lock(&clients_lock);
+       for (i = 0; i < max_client; ++i) {
+               if (clients[i])
+                       clients[i]->remove(ism);
+       }
+       mutex_unlock(&clients_lock);
 
        if (SYSTEM_EID.serial_number[0] != '0' ||
            SYSTEM_EID.type[0] != '0')
@@ -727,15 +721,14 @@ static void ism_dev_exit(struct ism_dev *ism)
        kfree(ism->sba_client_arr);
        pci_free_irq_vectors(pdev);
        list_del_init(&ism->list);
+       mutex_unlock(&ism_dev_list.mutex);
 }
 
 static void ism_remove(struct pci_dev *pdev)
 {
        struct ism_dev *ism = dev_get_drvdata(&pdev->dev);
 
-       mutex_lock(&ism_dev_list.mutex);
        ism_dev_exit(ism);
-       mutex_unlock(&ism_dev_list.mutex);
 
        pci_release_mem_regions(pdev);
        pci_disable_device(pdev);
index 1d19542..613eab7 100644 (file)
@@ -716,7 +716,6 @@ struct qeth_card_info {
        u16 chid;
        u8 ids_valid:1; /* cssid,iid,chid */
        u8 dev_addr_is_registered:1;
-       u8 open_when_online:1;
        u8 promisc_mode:1;
        u8 use_v1_blkt:1;
        u8 is_vm_nic:1;
index 1d5b207..cd78329 100644 (file)
@@ -5373,8 +5373,6 @@ int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
        qeth_clear_ipacmd_list(card);
 
        rtnl_lock();
-       card->info.open_when_online = card->dev->flags & IFF_UP;
-       dev_close(card->dev);
        netif_device_detach(card->dev);
        netif_carrier_off(card->dev);
        rtnl_unlock();
index 9f13ed1..75910c0 100644 (file)
@@ -2388,9 +2388,12 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
                qeth_enable_hw_features(dev);
                qeth_l2_enable_brport_features(card);
 
-               if (card->info.open_when_online) {
-                       card->info.open_when_online = 0;
-                       dev_open(dev, NULL);
+               if (netif_running(dev)) {
+                       local_bh_disable();
+                       napi_schedule(&card->napi);
+                       /* kick-start the NAPI softirq: */
+                       local_bh_enable();
+                       qeth_l2_set_rx_mode(dev);
                }
                rtnl_unlock();
        }
index af4e60d..b92a32b 100644 (file)
@@ -2018,9 +2018,11 @@ static int qeth_l3_set_online(struct qeth_card *card, bool carrier_ok)
                netif_device_attach(dev);
                qeth_enable_hw_features(dev);
 
-               if (card->info.open_when_online) {
-                       card->info.open_when_online = 0;
-                       dev_open(dev, NULL);
+               if (netif_running(dev)) {
+                       local_bh_disable();
+                       napi_schedule(&card->napi);
+                       /* kick-start the NAPI softirq: */
+                       local_bh_enable();
                }
                rtnl_unlock();
        }
index f213075..4f0d0e5 100644 (file)
@@ -534,8 +534,7 @@ static void zfcp_fc_adisc_handler(void *data)
 
        /* re-init to undo drop from zfcp_fc_adisc() */
        port->d_id = ntoh24(adisc_resp->adisc_port_id);
-       /* port is good, unblock rport without going through erp */
-       zfcp_scsi_schedule_rport_register(port);
+       /* port is still good, nothing to do */
  out:
        atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
        put_device(&port->dev);
@@ -595,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_struct *work)
        int retval;
 
        set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */
-       get_device(&port->dev);
-       port->rport_task = RPORT_DEL;
-       zfcp_scsi_rport_work(&port->rport_work);
 
        /* only issue one test command at one time per port */
        if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)
index e1e4f9d..857be0f 100644 (file)
@@ -1598,7 +1598,7 @@ NCR_700_intr(int irq, void *dev_id)
                                printk("scsi%d (%d:%d) PHASE MISMATCH IN SEND MESSAGE %d remain, return %p[%04x], phase %s\n", host->host_no, pun, lun, count, (void *)temp, temp - hostdata->pScript, sbcl_to_string(NCR_700_readb(host, SBCL_REG)));
 #endif
                                resume_offset = hostdata->pScript + Ent_SendMessagePhaseMismatch;
-                       } else if(dsp >= to32bit(&slot->pSG[0].ins) &&
+                       } else if (slot && dsp >= to32bit(&slot->pSG[0].ins) &&
                                  dsp <= to32bit(&slot->pSG[NCR_700_SG_SEGMENTS].ins)) {
                                int data_transfer = NCR_700_readl(host, DBC_REG) & 0xffffff;
                                int SGcount = (dsp - to32bit(&slot->pSG[0].ins))/sizeof(struct NCR_700_SG_List);
index 7c6efde..73b6ac0 100644 (file)
@@ -2618,7 +2618,7 @@ struct aac_hba_info {
 struct aac_aifcmd {
        __le32 command;         /* Tell host what type of notify this is */
        __le32 seqnum;          /* To allow ordering of reports (if necessary) */
-       u8 data[1];             /* Undefined length (from kernel viewpoint) */
+       u8 data[];              /* Undefined length (from kernel viewpoint) */
 };
 
 /**
index d82de34..e51e92f 100644 (file)
@@ -27,7 +27,7 @@
 
 #define DRV_NAME               "fnic"
 #define DRV_DESCRIPTION                "Cisco FCoE HBA Driver"
-#define DRV_VERSION            "1.6.0.54"
+#define DRV_VERSION            "1.6.0.55"
 #define PFX                    DRV_NAME ": "
 #define DFX                     DRV_NAME "%d: "
 
index 26dbd34..be89ce9 100644 (file)
@@ -2139,7 +2139,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
                                     bool new_sc)
 
 {
-       int ret = SUCCESS;
+       int ret = 0;
        struct fnic_pending_aborts_iter_data iter_data = {
                .fnic = fnic,
                .lun_dev = lr_sc->device,
@@ -2159,9 +2159,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
 
        /* walk again to check, if IOs are still pending in fw */
        if (fnic_is_abts_pending(fnic, lr_sc))
-               ret = FAILED;
+               ret = 1;
 
 clean_pending_aborts_end:
+       FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+                       "%s: exit status: %d\n", __func__, ret);
        return ret;
 }
 
index f3c3a26..be0d7c5 100644 (file)
@@ -465,7 +465,7 @@ int fnic_trace_buf_init(void)
        fnic_max_trace_entries = (trace_max_pages * PAGE_SIZE)/
                                          FNIC_ENTRY_SIZE_BYTES;
 
-       fnic_trace_buf_p = (unsigned long)vzalloc(trace_max_pages * PAGE_SIZE);
+       fnic_trace_buf_p = (unsigned long)vcalloc(trace_max_pages, PAGE_SIZE);
        if (!fnic_trace_buf_p) {
                printk(KERN_ERR PFX "Failed to allocate memory "
                                  "for fnic_trace_buf_p\n");
index 499849b..fdd7f69 100644 (file)
@@ -6944,7 +6944,9 @@ lpfc_unregister_fcf_rescan(struct lpfc_hba *phba)
        if (rc)
                return;
        /* Reset HBA FCF states after successful unregister FCF */
+       spin_lock_irq(&phba->hbalock);
        phba->fcf.fcf_flag = 0;
+       spin_unlock_irq(&phba->hbalock);
        phba->fcf.current_rec.flag = 0;
 
        /*
index a62e091..d26941b 100644 (file)
@@ -109,8 +109,6 @@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba,
        }
 }
 
-#define LPFC_INVALID_REFTAG ((u32)-1)
-
 /**
  * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread
  * @phba: The Hba for which this call is being executed.
@@ -978,8 +976,6 @@ lpfc_bg_err_inject(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 
        sgpe = scsi_prot_sglist(sc);
        lba = scsi_prot_ref_tag(sc);
-       if (lba == LPFC_INVALID_REFTAG)
-               return 0;
 
        /* First check if we need to match the LBA */
        if (phba->lpfc_injerr_lba != LPFC_INJERR_LBA_OFF) {
@@ -1560,8 +1556,6 @@ lpfc_bg_setup_bpl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 
        /* extract some info from the scsi command for pde*/
        reftag = scsi_prot_ref_tag(sc);
-       if (reftag == LPFC_INVALID_REFTAG)
-               goto out;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -1723,8 +1717,6 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
        /* extract some info from the scsi command */
        blksize = scsi_prot_interval(sc);
        reftag = scsi_prot_ref_tag(sc);
-       if (reftag == LPFC_INVALID_REFTAG)
-               goto out;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -1953,8 +1945,6 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
 
        /* extract some info from the scsi command for pde*/
        reftag = scsi_prot_ref_tag(sc);
-       if (reftag == LPFC_INVALID_REFTAG)
-               goto out;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -2154,8 +2144,6 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
        /* extract some info from the scsi command */
        blksize = scsi_prot_interval(sc);
        reftag = scsi_prot_ref_tag(sc);
-       if (reftag == LPFC_INVALID_REFTAG)
-               goto out;
 
 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
        rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -2746,8 +2734,6 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
 
                src = (struct scsi_dif_tuple *)sg_virt(sgpe);
                start_ref_tag = scsi_prot_ref_tag(cmd);
-               if (start_ref_tag == LPFC_INVALID_REFTAG)
-                       goto out;
                start_app_tag = src->app_tag;
                len = sgpe->length;
                while (src && protsegcnt) {
@@ -3493,11 +3479,11 @@ err:
                             scsi_cmnd->sc_data_direction);
 
        lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
-                       "9084 Cannot setup S/G List for HBA"
-                       "IO segs %d/%d SGL %d SCSI %d: %d %d\n",
+                       "9084 Cannot setup S/G List for HBA "
+                       "IO segs %d/%d SGL %d SCSI %d: %d %d %d\n",
                        lpfc_cmd->seg_cnt, lpfc_cmd->prot_seg_cnt,
                        phba->cfg_total_seg_cnt, phba->cfg_sg_seg_cnt,
-                       prot_group_type, num_sge);
+                       prot_group_type, num_sge, ret);
 
        lpfc_cmd->seg_cnt = 0;
        lpfc_cmd->prot_seg_cnt = 0;
index 2e886c1..4995e1e 100644 (file)
@@ -1181,7 +1181,8 @@ static int pm8001_pci_probe(struct pci_dev *pdev,
                pm80xx_set_thermal_config(pm8001_ha);
        }
 
-       if (pm8001_init_sas_add(pm8001_ha))
+       rc = pm8001_init_sas_add(pm8001_ha);
+       if (rc)
                goto err_out_shost;
        /* phy setting support for motherboard controller */
        rc = pm8001_configure_phy_settings(pm8001_ha);
index 2a31ddc..7825765 100644 (file)
@@ -31,6 +31,7 @@ static void qedf_remove(struct pci_dev *pdev);
 static void qedf_shutdown(struct pci_dev *pdev);
 static void qedf_schedule_recovery_handler(void *dev);
 static void qedf_recovery_handler(struct work_struct *work);
+static int qedf_suspend(struct pci_dev *pdev, pm_message_t state);
 
 /*
  * Driver module parameters.
@@ -3271,6 +3272,7 @@ static struct pci_driver qedf_pci_driver = {
        .probe = qedf_probe,
        .remove = qedf_remove,
        .shutdown = qedf_shutdown,
+       .suspend = qedf_suspend,
 };
 
 static int __qedf_probe(struct pci_dev *pdev, int mode)
@@ -4000,6 +4002,22 @@ static void qedf_shutdown(struct pci_dev *pdev)
        __qedf_remove(pdev, QEDF_MODE_NORMAL);
 }
 
+static int qedf_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct qedf_ctx *qedf;
+
+       if (!pdev) {
+               QEDF_ERR(NULL, "pdev is NULL.\n");
+               return -ENODEV;
+       }
+
+       qedf = pci_get_drvdata(pdev);
+
+       QEDF_ERR(&qedf->dbg_ctx, "%s: Device does not support suspend operation\n", __func__);
+
+       return -EPERM;
+}
+
 /*
  * Recovery handler code
  */
index 450522b..cd0180b 100644 (file)
@@ -69,6 +69,7 @@ static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
 static void qedi_recovery_handler(struct work_struct *work);
 static void qedi_schedule_hw_err_handler(void *dev,
                                         enum qed_hw_err_type err_type);
+static int qedi_suspend(struct pci_dev *pdev, pm_message_t state);
 
 static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
 {
@@ -1976,8 +1977,9 @@ static int qedi_cpu_offline(unsigned int cpu)
        struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu);
        struct qedi_work *work, *tmp;
        struct task_struct *thread;
+       unsigned long flags;
 
-       spin_lock_bh(&p->p_work_lock);
+       spin_lock_irqsave(&p->p_work_lock, flags);
        thread = p->iothread;
        p->iothread = NULL;
 
@@ -1988,7 +1990,7 @@ static int qedi_cpu_offline(unsigned int cpu)
                        kfree(work);
        }
 
-       spin_unlock_bh(&p->p_work_lock);
+       spin_unlock_irqrestore(&p->p_work_lock, flags);
        if (thread)
                kthread_stop(thread);
        return 0;
@@ -2510,6 +2512,22 @@ static void qedi_shutdown(struct pci_dev *pdev)
        __qedi_remove(pdev, QEDI_MODE_SHUTDOWN);
 }
 
+static int qedi_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+       struct qedi_ctx *qedi;
+
+       if (!pdev) {
+               QEDI_ERR(NULL, "pdev is NULL.\n");
+               return -ENODEV;
+       }
+
+       qedi = pci_get_drvdata(pdev);
+
+       QEDI_ERR(&qedi->dbg_ctx, "%s: Device does not support suspend operation\n", __func__);
+
+       return -EPERM;
+}
+
 static int __qedi_probe(struct pci_dev *pdev, int mode)
 {
        struct qedi_ctx *qedi;
@@ -2868,6 +2886,7 @@ static struct pci_driver qedi_pci_driver = {
        .remove = qedi_remove,
        .shutdown = qedi_shutdown,
        .err_handler = &qedi_err_handler,
+       .suspend = qedi_suspend,
 };
 
 static int __init qedi_init(void)
index d44c4d3..4ae3830 100644 (file)
@@ -4462,7 +4462,6 @@ struct qla_hw_data {
 
        /* n2n */
        struct fc_els_flogi plogi_els_payld;
-#define LOGIN_TEMPLATE_SIZE (sizeof(struct fc_els_flogi) - 4)
 
        void            *swl;
 
index c3dd8dd..367fba2 100644 (file)
@@ -8434,7 +8434,7 @@ qla24xx_load_risc_flash(scsi_qla_host_t *vha, uint32_t *srisc_addr,
                ql_dbg(ql_dbg_init, vha, 0x0163,
                    "-> fwdt%u template allocate template %#x words...\n",
                    j, risc_size);
-               fwdt->template = vmalloc(risc_size * sizeof(*dcode));
+               fwdt->template = vmalloc_array(risc_size, sizeof(*dcode));
                if (!fwdt->template) {
                        ql_log(ql_log_warn, vha, 0x0164,
                            "-> fwdt%u failed allocate template.\n", j);
@@ -8689,7 +8689,7 @@ qla24xx_load_risc_blob(scsi_qla_host_t *vha, uint32_t *srisc_addr)
                ql_dbg(ql_dbg_init, vha, 0x0173,
                    "-> fwdt%u template allocate template %#x words...\n",
                    j, risc_size);
-               fwdt->template = vmalloc(risc_size * sizeof(*dcode));
+               fwdt->template = vmalloc_array(risc_size, sizeof(*dcode));
                if (!fwdt->template) {
                        ql_log(ql_log_warn, vha, 0x0174,
                            "-> fwdt%u failed allocate template.\n", j);
index a1675f0..730d860 100644 (file)
@@ -3073,7 +3073,8 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
        memset(ptr, 0, sizeof(struct els_plogi_payload));
        memset(resp_ptr, 0, sizeof(struct els_plogi_payload));
        memcpy(elsio->u.els_plogi.els_plogi_pyld->data,
-           &ha->plogi_els_payld.fl_csp, LOGIN_TEMPLATE_SIZE);
+              (void *)&ha->plogi_els_payld + offsetof(struct fc_els_flogi, fl_csp),
+              sizeof(ha->plogi_els_payld) - offsetof(struct fc_els_flogi, fl_csp));
 
        elsio->u.els_plogi.els_cmd = els_opcode;
        elsio->u.els_plogi.els_plogi_pyld->opcode = els_opcode;
@@ -3911,7 +3912,7 @@ qla2x00_start_sp(srb_t *sp)
 
        pkt = __qla2x00_alloc_iocbs(sp->qpair, sp);
        if (!pkt) {
-               rval = EAGAIN;
+               rval = -EAGAIN;
                ql_log(ql_log_warn, vha, 0x700c,
                    "qla2x00_alloc_iocbs failed.\n");
                goto done;
index 898a0bd..95a86e0 100644 (file)
@@ -209,53 +209,6 @@ raid_attr_ro_state(level);
 raid_attr_ro_fn(resync);
 raid_attr_ro_state_fn(state);
 
-static void raid_component_release(struct device *dev)
-{
-       struct raid_component *rc =
-               container_of(dev, struct raid_component, dev);
-       dev_printk(KERN_ERR, rc->dev.parent, "COMPONENT RELEASE\n");
-       put_device(rc->dev.parent);
-       kfree(rc);
-}
-
-int raid_component_add(struct raid_template *r,struct device *raid_dev,
-                      struct device *component_dev)
-{
-       struct device *cdev =
-               attribute_container_find_class_device(&r->raid_attrs.ac,
-                                                     raid_dev);
-       struct raid_component *rc;
-       struct raid_data *rd = dev_get_drvdata(cdev);
-       int err;
-
-       rc = kzalloc(sizeof(*rc), GFP_KERNEL);
-       if (!rc)
-               return -ENOMEM;
-
-       INIT_LIST_HEAD(&rc->node);
-       device_initialize(&rc->dev);
-       rc->dev.release = raid_component_release;
-       rc->dev.parent = get_device(component_dev);
-       rc->num = rd->component_count++;
-
-       dev_set_name(&rc->dev, "component-%d", rc->num);
-       list_add_tail(&rc->node, &rd->component_list);
-       rc->dev.class = &raid_class.class;
-       err = device_add(&rc->dev);
-       if (err)
-               goto err_out;
-
-       return 0;
-
-err_out:
-       list_del(&rc->node);
-       rd->component_count--;
-       put_device(component_dev);
-       kfree(rc);
-       return err;
-}
-EXPORT_SYMBOL(raid_component_add);
-
 struct raid_template *
 raid_class_attach(struct raid_function_template *ft)
 {
index 8c58128..9c0af50 100644 (file)
@@ -841,11 +841,6 @@ static int sdeb_zbc_nr_conv = DEF_ZBC_NR_CONV_ZONES;
 static int submit_queues = DEF_SUBMIT_QUEUES;  /* > 1 for multi-queue (mq) */
 static int poll_queues; /* iouring iopoll interface.*/
 
-static DEFINE_RWLOCK(atomic_rw);
-static DEFINE_RWLOCK(atomic_rw2);
-
-static rwlock_t *ramdisk_lck_a[2];
-
 static char sdebug_proc_name[] = MY_NAME;
 static const char *my_name = MY_NAME;
 
@@ -6818,9 +6813,6 @@ static int __init scsi_debug_init(void)
        int k, ret, hosts_to_add;
        int idx = -1;
 
-       ramdisk_lck_a[0] = &atomic_rw;
-       ramdisk_lck_a[1] = &atomic_rw2;
-
        if (sdebug_ndelay >= 1000 * 1000 * 1000) {
                pr_warn("ndelay must be less than 1 second, ignored\n");
                sdebug_ndelay = 0;
index 4a6eb17..41f23cd 100644 (file)
@@ -406,7 +406,7 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
                               size_t length, loff_t *ppos)
 {
        int host, channel, id, lun;
-       char *buffer, *p;
+       char *buffer, *end, *p;
        int err;
 
        if (!buf || length > PAGE_SIZE)
@@ -421,10 +421,14 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
                goto out;
 
        err = -EINVAL;
-       if (length < PAGE_SIZE)
-               buffer[length] = '\0';
-       else if (buffer[PAGE_SIZE-1])
-               goto out;
+       if (length < PAGE_SIZE) {
+               end = buffer + length;
+               *end = '\0';
+       } else {
+               end = buffer + PAGE_SIZE - 1;
+               if (*end)
+                       goto out;
+       }
 
        /*
         * Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi
@@ -433,10 +437,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
        if (!strncmp("scsi add-single-device", buffer, 22)) {
                p = buffer + 23;
 
-               host = simple_strtoul(p, &p, 0);
-               channel = simple_strtoul(p + 1, &p, 0);
-               id = simple_strtoul(p + 1, &p, 0);
-               lun = simple_strtoul(p + 1, &p, 0);
+               host    = (p     < end) ? simple_strtoul(p, &p, 0) : 0;
+               channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+               id      = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+               lun     = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
 
                err = scsi_add_single_device(host, channel, id, lun);
 
@@ -447,10 +451,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
        } else if (!strncmp("scsi remove-single-device", buffer, 25)) {
                p = buffer + 26;
 
-               host = simple_strtoul(p, &p, 0);
-               channel = simple_strtoul(p + 1, &p, 0);
-               id = simple_strtoul(p + 1, &p, 0);
-               lun = simple_strtoul(p + 1, &p, 0);
+               host    = (p     < end) ? simple_strtoul(p, &p, 0) : 0;
+               channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+               id      = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+               lun     = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
 
                err = scsi_remove_single_device(host, channel, id, lun);
        }
index 68b12af..3c668cf 100644 (file)
@@ -3876,7 +3876,7 @@ static int sd_suspend_runtime(struct device *dev)
 static int sd_resume(struct device *dev)
 {
        struct scsi_disk *sdkp = dev_get_drvdata(dev);
-       int ret;
+       int ret = 0;
 
        if (!sdkp)      /* E.g.: runtime resume at the start of sd_probe() */
                return 0;
@@ -3884,8 +3884,11 @@ static int sd_resume(struct device *dev)
        if (!sdkp->device->manage_start_stop)
                return 0;
 
-       sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
-       ret = sd_start_stop_device(sdkp, 1);
+       if (!sdkp->device->no_start_on_resume) {
+               sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+               ret = sd_start_stop_device(sdkp, 1);
+       }
+
        if (!ret)
                opal_unlock_from_suspend(sdkp->opal_dev);
        return ret;
index abbd089..a252155 100644 (file)
@@ -831,7 +831,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
        struct request_queue *q = disk->queue;
        u32 zone_blocks = sdkp->early_zone_info.zone_blocks;
        unsigned int nr_zones = sdkp->early_zone_info.nr_zones;
-       u32 max_append;
        int ret = 0;
        unsigned int flags;
 
@@ -876,6 +875,11 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
                goto unlock;
        }
 
+       blk_queue_chunk_sectors(q,
+                       logical_to_sectors(sdkp->device, zone_blocks));
+       blk_queue_max_zone_append_sectors(q,
+                       q->limits.max_segments << PAGE_SECTORS_SHIFT);
+
        ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
 
        memalloc_noio_restore(flags);
@@ -888,12 +892,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
                goto unlock;
        }
 
-       max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
-                          q->limits.max_segments << PAGE_SECTORS_SHIFT);
-       max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
-
-       blk_queue_max_zone_append_sectors(q, max_append);
-
        sd_zbc_print_zones(sdkp);
 
 unlock:
index 89fa046..0d8afff 100644 (file)
@@ -1497,9 +1497,10 @@ sg_add_device(struct device *cl_dev)
        int error;
        unsigned long iflags;
 
-       error = blk_get_queue(scsidp->request_queue);
-       if (error)
-               return error;
+       if (!blk_get_queue(scsidp->request_queue)) {
+               pr_warn("%s: get scsi_device queue failed\n", __func__);
+               return -ENODEV;
+       }
 
        error = -ENOMEM;
        cdev = cdev_alloc();
index 3e2e578..4db3ba6 100644 (file)
@@ -307,7 +307,7 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid)
                spin_lock_irqsave(snic->shost->host_lock, flags);
                list_del(&tgt->list);
                spin_unlock_irqrestore(snic->shost->host_lock, flags);
-               kfree(tgt);
+               put_device(&tgt->dev);
                tgt = NULL;
 
                return tgt;
index 659196a..047ffaf 100644 (file)
@@ -318,6 +318,7 @@ enum storvsc_request_type {
 #define SRB_STATUS_INVALID_REQUEST     0x06
 #define SRB_STATUS_DATA_OVERRUN                0x12
 #define SRB_STATUS_INVALID_LUN         0x20
+#define SRB_STATUS_INTERNAL_ERROR      0x30
 
 #define SRB_STATUS(status) \
        (status & ~(SRB_STATUS_AUTOSENSE_VALID | SRB_STATUS_QUEUE_FROZEN))
@@ -365,6 +366,7 @@ static void storvsc_on_channel_callback(void *context);
 #define STORVSC_FC_MAX_LUNS_PER_TARGET                 255
 #define STORVSC_FC_MAX_TARGETS                         128
 #define STORVSC_FC_MAX_CHANNELS                                8
+#define STORVSC_FC_MAX_XFER_SIZE                       ((u32)(512 * 1024))
 
 #define STORVSC_IDE_MAX_LUNS_PER_TARGET                        64
 #define STORVSC_IDE_MAX_TARGETS                                1
@@ -978,6 +980,7 @@ static void storvsc_handle_error(struct vmscsi_request *vm_srb,
        case SRB_STATUS_ERROR:
        case SRB_STATUS_ABORTED:
        case SRB_STATUS_INVALID_REQUEST:
+       case SRB_STATUS_INTERNAL_ERROR:
                if (vm_srb->srb_status & SRB_STATUS_AUTOSENSE_VALID) {
                        /* Check for capacity change */
                        if ((asc == 0x2a) && (ascq == 0x9)) {
@@ -1671,10 +1674,6 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
  */
 static enum scsi_timeout_action storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
 {
-#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
-       if (scmnd->device->host->transportt == fc_transport_template)
-               return fc_eh_timed_out(scmnd);
-#endif
        return SCSI_EH_RESET_TIMER;
 }
 
@@ -2004,6 +2003,9 @@ static int storvsc_probe(struct hv_device *device,
         * protecting it from any weird value.
         */
        max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE);
+       if (is_fc)
+               max_xfer_bytes = min(max_xfer_bytes, STORVSC_FC_MAX_XFER_SIZE);
+
        /* max_hw_sectors_kb */
        host->max_sectors = max_xfer_bytes >> 9;
        /*
index 1ca1403..3f75912 100644 (file)
@@ -137,6 +137,7 @@ static int __init aspeed_socinfo_init(void)
 
        soc_dev = soc_device_register(attrs);
        if (IS_ERR(soc_dev)) {
+               kfree(attrs->machine);
                kfree(attrs->soc_id);
                kfree(attrs->serial_number);
                kfree(attrs);
index ef8b24f..59123e1 100644 (file)
@@ -524,7 +524,7 @@ static ssize_t aspeed_uart_routing_store(struct device *dev,
        struct aspeed_uart_routing_selector *sel = to_routing_selector(attr);
        int val;
 
-       val = match_string(sel->options, -1, buf);
+       val = __sysfs_match_string(sel->options, -1, buf);
        if (val < 0) {
                dev_err(dev, "invalid value \"%s\"\n", buf);
                return -EINVAL;
index b3c226e..58746e5 100644 (file)
@@ -524,7 +524,7 @@ int qe_upload_firmware(const struct qe_firmware *firmware)
         * saved microcode information and put in the new.
         */
        memset(&qe_firmware_info, 0, sizeof(qe_firmware_info));
-       strlcpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id));
+       strscpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id));
        qe_firmware_info.extended_modes = be64_to_cpu(firmware->extended_modes);
        memcpy(qe_firmware_info.vtraps, firmware->vtraps,
                sizeof(firmware->vtraps));
@@ -599,7 +599,7 @@ struct qe_firmware_info *qe_get_firmware_info(void)
        /* Copy the data into qe_firmware_info*/
        sprop = of_get_property(fw, "id", NULL);
        if (sprop)
-               strlcpy(qe_firmware_info.id, sprop,
+               strscpy(qe_firmware_info.id, sprop,
                        sizeof(qe_firmware_info.id));
 
        of_property_read_u64(fw, "extended-modes",
index 870aecc..1c1fcab 100644 (file)
@@ -164,7 +164,7 @@ static int imx8mp_hsio_blk_ctrl_probe(struct imx8mp_blk_ctrl *bc)
        clk_hsio_pll->hw.init = &init;
 
        hw = &clk_hsio_pll->hw;
-       ret = devm_clk_hw_register(bc->dev, hw);
+       ret = devm_clk_hw_register(bc->bus_power_dev, hw);
        if (ret)
                return ret;
 
index 08aeb7e..3a99f6d 100644 (file)
@@ -910,9 +910,9 @@ static int amd_sdw_manager_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        amd_manager->acp_mmio = devm_ioremap(dev, res->start, resource_size(res));
-       if (IS_ERR(amd_manager->mmio)) {
+       if (!amd_manager->acp_mmio) {
                dev_err(dev, "mmio not found\n");
-               return PTR_ERR(amd_manager->mmio);
+               return -ENOMEM;
        }
        amd_manager->instance = pdata->instance;
        amd_manager->mmio = amd_manager->acp_mmio +
index dba920e..cf78839 100644 (file)
@@ -922,8 +922,8 @@ static void sdw_modify_slave_status(struct sdw_slave *slave,
                        "initializing enumeration and init completion for Slave %d\n",
                        slave->dev_num);
 
-               init_completion(&slave->enumeration_complete);
-               init_completion(&slave->initialization_complete);
+               reinit_completion(&slave->enumeration_complete);
+               reinit_completion(&slave->initialization_complete);
 
        } else if ((status == SDW_SLAVE_ATTACHED) &&
                   (slave->status == SDW_SLAVE_UNATTACHED)) {
@@ -931,7 +931,7 @@ static void sdw_modify_slave_status(struct sdw_slave *slave,
                        "signaling enumeration completion for Slave %d\n",
                        slave->dev_num);
 
-               complete(&slave->enumeration_complete);
+               complete_all(&slave->enumeration_complete);
        }
        slave->status = status;
        mutex_unlock(&bus->bus_lock);
@@ -1951,7 +1951,7 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
                                "signaling initialization completion for Slave %d\n",
                                slave->dev_num);
 
-                       complete(&slave->initialization_complete);
+                       complete_all(&slave->initialization_complete);
 
                        /*
                         * If the manager became pm_runtime active, the peripherals will be
index 7970fdb..c029e4d 100644 (file)
@@ -540,7 +540,7 @@ static int qcom_swrm_get_alert_slave_dev_num(struct qcom_swrm_ctrl *ctrl)
                status = (val >> (dev_num * SWRM_MCP_SLV_STATUS_SZ));
 
                if ((status & SWRM_MCP_SLV_STATUS_MASK) == SDW_SLAVE_ALERT) {
-                       ctrl->status[dev_num] = status;
+                       ctrl->status[dev_num] = status & SWRM_MCP_SLV_STATUS_MASK;
                        return dev_num;
                }
        }
index 9aecb77..07b5b71 100644 (file)
@@ -126,7 +126,7 @@ enum bcm63xx_regs_spi {
        SPI_MSG_DATA_SIZE,
 };
 
-#define BCM63XX_SPI_MAX_PREPEND                15
+#define BCM63XX_SPI_MAX_PREPEND                7
 
 #define BCM63XX_SPI_MAX_CS             8
 #define BCM63XX_SPI_BUS_NUM            0
index de8fe3c..9b02139 100644 (file)
@@ -317,12 +317,6 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx)
        xspi->rx_bytes -= nrx;
 
        while (ntx || nrx) {
-               /* When xspi in busy condition, bytes may send failed,
-                * then spi control did't work thoroughly, add one byte delay
-                */
-               if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
-                       udelay(10);
-
                if (ntx) {
                        if (xspi->txbuf)
                                cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
@@ -392,6 +386,11 @@ static irqreturn_t cdns_spi_irq(int irq, void *dev_id)
                if (xspi->tx_bytes) {
                        cdns_spi_process_fifo(xspi, trans_cnt, trans_cnt);
                } else {
+                       /* Fixed delay due to controller limitation with
+                        * RX_NEMPTY incorrect status
+                        * Xilinx AR:65885 contains more details
+                        */
+                       udelay(10);
                        cdns_spi_process_fifo(xspi, 0, trans_cnt);
                        cdns_spi_write(xspi, CDNS_SPI_IDR,
                                       CDNS_SPI_IXR_DEFAULT);
@@ -439,12 +438,18 @@ static int cdns_transfer_one(struct spi_controller *ctlr,
                cdns_spi_setup_transfer(spi, transfer);
        } else {
                /* Set TX empty threshold to half of FIFO depth
-                * only if TX bytes are more than half FIFO depth.
+                * only if TX bytes are more than FIFO depth.
                 */
                if (xspi->tx_bytes > xspi->tx_fifo_depth)
                        cdns_spi_write(xspi, CDNS_SPI_THLD, xspi->tx_fifo_depth >> 1);
        }
 
+       /* When xspi in busy condition, bytes may send failed,
+        * then spi control didn't work thoroughly, add one byte delay
+        */
+       if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
+               udelay(10);
+
        cdns_spi_process_fifo(xspi, xspi->tx_fifo_depth, 0);
        spi_transfer_delay_exec(transfer);
 
index a8a683d..1954c39 100644 (file)
@@ -69,7 +69,7 @@
                                 WR_FIFO_OVERRUN)
 #define QSPI_ALL_IRQS          (QSPI_ERR_IRQS | RESP_FIFO_RDY | \
                                 WR_FIFO_EMPTY | WR_FIFO_FULL | \
-                                TRANSACTION_DONE)
+                                TRANSACTION_DONE | DMA_CHAIN_DONE)
 
 #define PIO_XFER_CTRL          0x0014
 #define REQUEST_COUNT_MSK      0xffff
@@ -308,9 +308,11 @@ static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr,
        dma_addr_t dma_cmd_desc;
 
        /* allocate for dma cmd descriptor */
-       virt_cmd_desc = dma_pool_alloc(ctrl->dma_cmd_pool, GFP_KERNEL | __GFP_ZERO, &dma_cmd_desc);
-       if (!virt_cmd_desc)
-               return -ENOMEM;
+       virt_cmd_desc = dma_pool_alloc(ctrl->dma_cmd_pool, GFP_ATOMIC | __GFP_ZERO, &dma_cmd_desc);
+       if (!virt_cmd_desc) {
+               dev_warn_once(ctrl->dev, "Couldn't find memory for descriptor\n");
+               return -EAGAIN;
+       }
 
        ctrl->virt_cmd_desc[ctrl->n_cmd_desc] = virt_cmd_desc;
        ctrl->dma_cmd_desc[ctrl->n_cmd_desc] = dma_cmd_desc;
@@ -355,10 +357,22 @@ static int qcom_qspi_setup_dma_desc(struct qcom_qspi *ctrl,
 
        for (i = 0; i < sgt->nents; i++) {
                dma_ptr_sg = sg_dma_address(sgt->sgl + i);
+               dma_len_sg = sg_dma_len(sgt->sgl + i);
                if (!IS_ALIGNED(dma_ptr_sg, QSPI_ALIGN_REQ)) {
                        dev_warn_once(ctrl->dev, "dma_address not aligned to %d\n", QSPI_ALIGN_REQ);
                        return -EAGAIN;
                }
+               /*
+                * When reading with DMA the controller writes to memory 1 word
+                * at a time. If the length isn't a multiple of 4 bytes then
+                * the controller can clobber the things later in memory.
+                * Fallback to PIO to be safe.
+                */
+               if (ctrl->xfer.dir == QSPI_READ && (dma_len_sg & 0x03)) {
+                       dev_warn_once(ctrl->dev, "fallback to PIO for read of size %#010x\n",
+                                     dma_len_sg);
+                       return -EAGAIN;
+               }
        }
 
        for (i = 0; i < sgt->nents; i++) {
@@ -441,8 +455,10 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
 
                ret = qcom_qspi_setup_dma_desc(ctrl, xfer);
                if (ret != -EAGAIN) {
-                       if (!ret)
+                       if (!ret) {
+                               dma_wmb();
                                qcom_qspi_dma_xfer(ctrl);
+                       }
                        goto exit;
                }
                dev_warn_once(ctrl->dev, "DMA failure, falling back to PIO\n");
@@ -603,6 +619,9 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
        int_status = readl(ctrl->base + MSTR_INT_STATUS);
        writel(int_status, ctrl->base + MSTR_INT_STATUS);
 
+       /* Ignore disabled interrupts */
+       int_status &= readl(ctrl->base + MSTR_INT_EN);
+
        /* PIO mode handling */
        if (ctrl->xfer.dir == QSPI_WRITE) {
                if (int_status & WR_FIFO_EMPTY)
@@ -647,6 +666,30 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
        return ret;
 }
 
+static int qcom_qspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+       /*
+        * If qcom_qspi_can_dma() is going to return false we don't need to
+        * adjust anything.
+        */
+       if (op->data.nbytes <= QSPI_MAX_BYTES_FIFO)
+               return 0;
+
+       /*
+        * When reading, the transfer needs to be a multiple of 4 bytes so
+        * shrink the transfer if that's not true. The caller will then do a
+        * second transfer to finish things up.
+        */
+       if (op->data.dir == SPI_MEM_DATA_IN && (op->data.nbytes & 0x3))
+               op->data.nbytes &= ~0x3;
+
+       return 0;
+}
+
+static const struct spi_controller_mem_ops qcom_qspi_mem_ops = {
+       .adjust_op_size = qcom_qspi_adjust_op_size,
+};
+
 static int qcom_qspi_probe(struct platform_device *pdev)
 {
        int ret;
@@ -731,6 +774,7 @@ static int qcom_qspi_probe(struct platform_device *pdev)
        if (of_property_read_bool(pdev->dev.of_node, "iommus"))
                master->can_dma = qcom_qspi_can_dma;
        master->auto_runtime_pm = true;
+       master->mem_ops = &qcom_qspi_mem_ops;
 
        ret = devm_pm_opp_set_clkname(&pdev->dev, "core");
        if (ret)
index fd55697..b6c2659 100644 (file)
@@ -684,6 +684,8 @@ static int s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd)
 
        if ((sdd->cur_mode & SPI_LOOP) && sdd->port_conf->has_loopback)
                val |= S3C64XX_SPI_MODE_SELF_LOOPBACK;
+       else
+               val &= ~S3C64XX_SPI_MODE_SELF_LOOPBACK;
 
        writel(val, regs + S3C64XX_SPI_MODE_CFG);
 
index 6d10fa4..7ddf9db 100644 (file)
@@ -1001,9 +1001,9 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
        if (spi->cfg->set_number_of_data) {
                int ret;
 
-               ret = spi_split_transfers_maxsize(ctrl, msg,
-                                                 STM32H7_SPI_TSIZE_MAX,
-                                                 GFP_KERNEL | GFP_DMA);
+               ret = spi_split_transfers_maxwords(ctrl, msg,
+                                                  STM32H7_SPI_TSIZE_MAX,
+                                                  GFP_KERNEL | GFP_DMA);
                if (ret)
                        return ret;
        }
index 9ccd082..47e72b8 100644 (file)
@@ -145,7 +145,7 @@ static struct fbtft_display display = {
        },
 };
 
-FBTFT_REGISTER_DRIVER(DRVNAME, "ilitek,ili9341", &display);
+FBTFT_REGISTER_SPI_DRIVER(DRVNAME, "ilitek", "ili9341", &display);
 
 MODULE_ALIAS("spi:" DRVNAME);
 MODULE_ALIAS("platform:" DRVNAME);
index e03c87f..0fb97a7 100644 (file)
@@ -1583,8 +1583,10 @@ static int ks_wlan_set_encode_ext(struct net_device *dev,
                        commit |= SME_WEP_FLAG;
                }
                if (enc->key_len) {
-                       memcpy(&key->key_val[0], &enc->key[0], enc->key_len);
-                       key->key_len = enc->key_len;
+                       int key_len = clamp_val(enc->key_len, 0, IW_ENCODING_TOKEN_MAX);
+
+                       memcpy(&key->key_val[0], &enc->key[0], key_len);
+                       key->key_len = key_len;
                        commit |= (SME_WEP_VAL1 << index);
                }
                break;
index c9bff98..e9b168b 100644 (file)
@@ -13,6 +13,7 @@ config VIDEO_ATOMISP
        tristate "Intel Atom Image Signal Processor Driver"
        depends on VIDEO_DEV && INTEL_ATOMISP
        depends on PMIC_OPREGION
+       select V4L2_FWNODE
        select IOSF_MBI
        select VIDEOBUF2_VMALLOC
        select VIDEO_V4L2_SUBDEV_API
index 090345b..6353dbe 100644 (file)
@@ -21,6 +21,7 @@
 #include "osdep_intf.h"
 #include "usb_ops.h"
 
+#include <linux/usb.h>
 #include <linux/ieee80211.h>
 
 static const u8 P802_1H_OUI[P80211_OUI_LEN] = {0x00, 0x00, 0xf8};
@@ -55,6 +56,7 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
        sint i;
        struct xmit_buf *pxmitbuf;
        struct xmit_frame *pxframe;
+       int j;
 
        memset((unsigned char *)pxmitpriv, 0, sizeof(struct xmit_priv));
        spin_lock_init(&pxmitpriv->lock);
@@ -117,11 +119,8 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
        _init_queue(&pxmitpriv->pending_xmitbuf_queue);
        pxmitpriv->pallocated_xmitbuf =
                kmalloc(NR_XMITBUFF * sizeof(struct xmit_buf) + 4, GFP_ATOMIC);
-       if (!pxmitpriv->pallocated_xmitbuf) {
-               kfree(pxmitpriv->pallocated_frame_buf);
-               pxmitpriv->pallocated_frame_buf = NULL;
-               return -ENOMEM;
-       }
+       if (!pxmitpriv->pallocated_xmitbuf)
+               goto clean_up_frame_buf;
        pxmitpriv->pxmitbuf = pxmitpriv->pallocated_xmitbuf + 4 -
                              ((addr_t)(pxmitpriv->pallocated_xmitbuf) & 3);
        pxmitbuf = (struct xmit_buf *)pxmitpriv->pxmitbuf;
@@ -129,13 +128,17 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
                INIT_LIST_HEAD(&pxmitbuf->list);
                pxmitbuf->pallocated_buf =
                        kmalloc(MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ, GFP_ATOMIC);
-               if (!pxmitbuf->pallocated_buf)
-                       return -ENOMEM;
+               if (!pxmitbuf->pallocated_buf) {
+                       j = 0;
+                       goto clean_up_alloc_buf;
+               }
                pxmitbuf->pbuf = pxmitbuf->pallocated_buf + XMITBUF_ALIGN_SZ -
                                 ((addr_t) (pxmitbuf->pallocated_buf) &
                                 (XMITBUF_ALIGN_SZ - 1));
-               if (r8712_xmit_resource_alloc(padapter, pxmitbuf))
-                       return -ENOMEM;
+               if (r8712_xmit_resource_alloc(padapter, pxmitbuf)) {
+                       j = 1;
+                       goto clean_up_alloc_buf;
+               }
                list_add_tail(&pxmitbuf->list,
                                 &(pxmitpriv->free_xmitbuf_queue.queue));
                pxmitbuf++;
@@ -146,6 +149,28 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
        init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry);
        tasklet_setup(&pxmitpriv->xmit_tasklet, r8712_xmit_bh);
        return 0;
+
+clean_up_alloc_buf:
+       if (j) {
+               /* failure happened in r8712_xmit_resource_alloc()
+                * delete extra pxmitbuf->pallocated_buf
+                */
+               kfree(pxmitbuf->pallocated_buf);
+       }
+       for (j = 0; j < i; j++) {
+               int k;
+
+               pxmitbuf--;                     /* reset pointer */
+               kfree(pxmitbuf->pallocated_buf);
+               for (k = 0; k < 8; k++)         /* delete xmit urb's */
+                       usb_free_urb(pxmitbuf->pxmit_urb[k]);
+       }
+       kfree(pxmitpriv->pallocated_xmitbuf);
+       pxmitpriv->pallocated_xmitbuf = NULL;
+clean_up_frame_buf:
+       kfree(pxmitpriv->pallocated_frame_buf);
+       pxmitpriv->pallocated_frame_buf = NULL;
+       return -ENOMEM;
 }
 
 void _free_xmit_priv(struct xmit_priv *pxmitpriv)
index 132afbf..ceb6b59 100644 (file)
@@ -112,6 +112,12 @@ int r8712_xmit_resource_alloc(struct _adapter *padapter,
        for (i = 0; i < 8; i++) {
                pxmitbuf->pxmit_urb[i] = usb_alloc_urb(0, GFP_KERNEL);
                if (!pxmitbuf->pxmit_urb[i]) {
+                       int k;
+
+                       for (k = i - 1; k >= 0; k--) {
+                               /* handle allocation errors part way through loop */
+                               usb_free_urb(pxmitbuf->pxmit_urb[k]);
+                       }
                        netdev_err(padapter->pnetdev, "pxmitbuf->pxmit_urb[i] == NULL\n");
                        return -ENOMEM;
                }
index 013f163..2f00fc3 100644 (file)
@@ -57,10 +57,10 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu)
 
 static int rapl_mmio_read_raw(int cpu, struct reg_action *ra)
 {
-       if (!ra->reg)
+       if (!ra->reg.mmio)
                return -EINVAL;
 
-       ra->value = readq((void __iomem *)ra->reg);
+       ra->value = readq(ra->reg.mmio);
        ra->value &= ra->mask;
        return 0;
 }
@@ -69,13 +69,13 @@ static int rapl_mmio_write_raw(int cpu, struct reg_action *ra)
 {
        u64 val;
 
-       if (!ra->reg)
+       if (!ra->reg.mmio)
                return -EINVAL;
 
-       val = readq((void __iomem *)ra->reg);
+       val = readq(ra->reg.mmio);
        val &= ~ra->mask;
        val |= ra->value;
-       writeq(val, (void __iomem *)ra->reg);
+       writeq(val, ra->reg.mmio);
        return 0;
 }
 
@@ -92,13 +92,13 @@ int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc
        for (domain = RAPL_DOMAIN_PACKAGE; domain < RAPL_DOMAIN_MAX; domain++) {
                for (reg = RAPL_DOMAIN_REG_LIMIT; reg < RAPL_DOMAIN_REG_MAX; reg++)
                        if (rapl_regs->regs[domain][reg])
-                               rapl_mmio_priv.regs[domain][reg] =
-                                               (u64)proc_priv->mmio_base +
+                               rapl_mmio_priv.regs[domain][reg].mmio =
+                                               proc_priv->mmio_base +
                                                rapl_regs->regs[domain][reg];
                rapl_mmio_priv.limits[domain] = rapl_regs->limits[domain];
        }
        rapl_mmio_priv.type = RAPL_IF_MMIO;
-       rapl_mmio_priv.reg_unit = (u64)proc_priv->mmio_base + rapl_regs->reg_unit;
+       rapl_mmio_priv.reg_unit.mmio = proc_priv->mmio_base + rapl_regs->reg_unit;
 
        rapl_mmio_priv.read_raw = rapl_mmio_read_raw;
        rapl_mmio_priv.write_raw = rapl_mmio_write_raw;
index e95f799..6c39214 100644 (file)
@@ -60,7 +60,7 @@ static const struct x86_cpu_id tcc_ids[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
-       X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
        X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL),
index 842f678..a597005 100644 (file)
@@ -348,7 +348,8 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip_id)
        struct thermal_trip trip;
 
        /* Ignore disabled trip points */
-       if (test_bit(trip_id, &tz->trips_disabled))
+       if (test_bit(trip_id, &tz->trips_disabled) ||
+           trip.temperature == THERMAL_TEMP_INVALID)
                return;
 
        __thermal_zone_get_trip(tz, trip_id, &trip);
@@ -496,6 +497,25 @@ void thermal_zone_device_update(struct thermal_zone_device *tz,
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_update);
 
+/**
+ * thermal_zone_device_exec - Run a callback under the zone lock.
+ * @tz: Thermal zone.
+ * @cb: Callback to run.
+ * @data: Data to pass to the callback.
+ */
+void thermal_zone_device_exec(struct thermal_zone_device *tz,
+                             void (*cb)(struct thermal_zone_device *,
+                                        unsigned long),
+                             unsigned long data)
+{
+       mutex_lock(&tz->lock);
+
+       cb(tz, data);
+
+       mutex_unlock(&tz->lock);
+}
+EXPORT_SYMBOL_GPL(thermal_zone_device_exec);
+
 static void thermal_zone_device_check(struct work_struct *work)
 {
        struct thermal_zone_device *tz = container_of(work, struct
@@ -1203,7 +1223,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_get_crit_temp);
 struct thermal_zone_device *
 thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *trips, int num_trips, int mask,
                                        void *devdata, struct thermal_zone_device_ops *ops,
-                                       struct thermal_zone_params *tzp, int passive_delay,
+                                       const struct thermal_zone_params *tzp, int passive_delay,
                                        int polling_delay)
 {
        struct thermal_zone_device *tz;
@@ -1371,7 +1391,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_device_register_with_trips);
 
 struct thermal_zone_device *thermal_zone_device_register(const char *type, int ntrips, int mask,
                                                         void *devdata, struct thermal_zone_device_ops *ops,
-                                                        struct thermal_zone_params *tzp, int passive_delay,
+                                                        const struct thermal_zone_params *tzp, int passive_delay,
                                                         int polling_delay)
 {
        return thermal_zone_device_register_with_trips(type, NULL, ntrips, mask,
index 17c1bbe..04513f9 100644 (file)
@@ -54,10 +54,6 @@ int for_each_thermal_cooling_device(int (*cb)(struct thermal_cooling_device *,
 int for_each_thermal_governor(int (*cb)(struct thermal_governor *, void *),
                              void *thermal_governor);
 
-int __for_each_thermal_trip(struct thermal_zone_device *,
-                           int (*cb)(struct thermal_trip *, void *),
-                           void *);
-
 struct thermal_zone_device *thermal_zone_get_by_id(int id);
 
 struct thermal_attr {
index 6fb14e5..bc07ae1 100644 (file)
@@ -238,17 +238,13 @@ static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdel
        return 0;
 }
 
-static struct thermal_zone_params *thermal_of_parameters_init(struct device_node *np)
+static void thermal_of_parameters_init(struct device_node *np,
+                                      struct thermal_zone_params *tzp)
 {
-       struct thermal_zone_params *tzp;
        int coef[2];
        int ncoef = ARRAY_SIZE(coef);
        int prop, ret;
 
-       tzp = kzalloc(sizeof(*tzp), GFP_KERNEL);
-       if (!tzp)
-               return ERR_PTR(-ENOMEM);
-
        tzp->no_hwmon = true;
 
        if (!of_property_read_u32(np, "sustainable-power", &prop))
@@ -267,8 +263,6 @@ static struct thermal_zone_params *thermal_of_parameters_init(struct device_node
 
        tzp->slope = coef[0];
        tzp->offset = coef[1];
-
-       return tzp;
 }
 
 static struct device_node *thermal_of_zone_get_by_name(struct thermal_zone_device *tz)
@@ -442,13 +436,11 @@ static int thermal_of_unbind(struct thermal_zone_device *tz,
 static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
 {
        struct thermal_trip *trips = tz->trips;
-       struct thermal_zone_params *tzp = tz->tzp;
        struct thermal_zone_device_ops *ops = tz->ops;
 
        thermal_zone_device_disable(tz);
        thermal_zone_device_unregister(tz);
        kfree(trips);
-       kfree(tzp);
        kfree(ops);
 }
 
@@ -477,7 +469,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
 {
        struct thermal_zone_device *tz;
        struct thermal_trip *trips;
-       struct thermal_zone_params *tzp;
+       struct thermal_zone_params tzp = {};
        struct thermal_zone_device_ops *of_ops;
        struct device_node *np;
        int delay, pdelay;
@@ -509,12 +501,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
                goto out_kfree_trips;
        }
 
-       tzp = thermal_of_parameters_init(np);
-       if (IS_ERR(tzp)) {
-               ret = PTR_ERR(tzp);
-               pr_err("Failed to initialize parameter from %pOFn: %d\n", np, ret);
-               goto out_kfree_trips;
-       }
+       thermal_of_parameters_init(np, &tzp);
 
        of_ops->bind = thermal_of_bind;
        of_ops->unbind = thermal_of_unbind;
@@ -522,12 +509,12 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
        mask = GENMASK_ULL((ntrips) - 1, 0);
 
        tz = thermal_zone_device_register_with_trips(np->name, trips, ntrips,
-                                                    mask, data, of_ops, tzp,
+                                                    mask, data, of_ops, &tzp,
                                                     pdelay, delay);
        if (IS_ERR(tz)) {
                ret = PTR_ERR(tz);
                pr_err("Failed to register thermal zone %pOFn: %d\n", np, ret);
-               goto out_kfree_tzp;
+               goto out_kfree_trips;
        }
 
        ret = thermal_zone_device_enable(tz);
@@ -540,8 +527,6 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
 
        return tz;
 
-out_kfree_tzp:
-       kfree(tzp);
 out_kfree_trips:
        kfree(trips);
 out_kfree_of_ops:
index 907f3a4..53115cf 100644 (file)
@@ -9,28 +9,26 @@
  */
 #include "thermal_core.h"
 
-int __for_each_thermal_trip(struct thermal_zone_device *tz,
-                           int (*cb)(struct thermal_trip *, void *),
-                           void *data)
+int for_each_thermal_trip(struct thermal_zone_device *tz,
+                         int (*cb)(struct thermal_trip *, void *),
+                         void *data)
 {
        int i, ret;
-       struct thermal_trip trip;
 
        lockdep_assert_held(&tz->lock);
 
-       for (i = 0; i < tz->num_trips; i++) {
-
-               ret = __thermal_zone_get_trip(tz, i, &trip);
-               if (ret)
-                       return ret;
+       if (!tz->trips)
+               return -ENODATA;
 
-               ret = cb(&trip, data);
+       for (i = 0; i < tz->num_trips; i++) {
+               ret = cb(&tz->trips[i], data);
                if (ret)
                        return ret;
        }
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(for_each_thermal_trip);
 
 int thermal_zone_get_num_trips(struct thermal_zone_device *tz)
 {
index 62b26b7..3fb4553 100644 (file)
@@ -1964,6 +1964,8 @@ unlock:
 
        pm_runtime_mark_last_busy(&tb->dev);
        pm_runtime_put_autosuspend(&tb->dev);
+
+       kfree(ev);
 }
 
 static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port)
index 1269f41..0dfd1e0 100644 (file)
@@ -579,7 +579,9 @@ int tb_switch_tmu_disable(struct tb_switch *sw)
                 * uni-directional mode and we don't want to change it's TMU
                 * mode.
                 */
-               tb_switch_tmu_rate_write(sw, tmu_rates[TB_SWITCH_TMU_MODE_OFF]);
+               ret = tb_switch_tmu_rate_write(sw, tmu_rates[TB_SWITCH_TMU_MODE_OFF]);
+               if (ret)
+                       return ret;
 
                tb_port_tmu_time_sync_disable(up);
                ret = tb_port_tmu_time_sync_disable(down);
index 341abae..069de55 100644 (file)
@@ -164,6 +164,9 @@ config LEGACY_TIOCSTI
          userspace depends on this functionality to continue operating
          normally.
 
+         Processes which run with CAP_SYS_ADMIN, such as BRLTTY, can
+         use TIOCSTI even when this is set to N.
+
          This functionality can be changed at runtime with the
          dev.tty.legacy_tiocsti sysctl. This configuration option sets
          the default value of the sysctl.
index b411a26..739f522 100644 (file)
@@ -3042,12 +3042,13 @@ static void gsm_error(struct gsm_mux *gsm)
 static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
 {
        int i;
-       struct gsm_dlci *dlci = gsm->dlci[0];
+       struct gsm_dlci *dlci;
        struct gsm_msg *txq, *ntxq;
 
        gsm->dead = true;
        mutex_lock(&gsm->mutex);
 
+       dlci = gsm->dlci[0];
        if (dlci) {
                if (disc && dlci->state != DLCI_CLOSED) {
                        gsm_dlci_begin_close(dlci);
@@ -3070,8 +3071,10 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
                gsm->has_devices = false;
        }
        for (i = NUM_DLCI - 1; i >= 0; i--)
-               if (gsm->dlci[i])
+               if (gsm->dlci[i]) {
                        gsm_dlci_release(gsm->dlci[i]);
+                       gsm->dlci[i] = NULL;
+               }
        mutex_unlock(&gsm->mutex);
        /* Now wipe the queues */
        tty_ldisc_flush(gsm->tty);
index 914e0e6..3449f87 100644 (file)
@@ -497,6 +497,7 @@ static struct uart_8250_port *serial8250_setup_port(int index)
 
        up = &serial8250_ports[index];
        up->port.line = index;
+       up->port.port_id = index;
 
        serial8250_init_port(up);
        if (!base_ops)
@@ -1040,6 +1041,7 @@ int serial8250_register_8250_port(const struct uart_8250_port *up)
                        uart_remove_one_port(&serial8250_reg, &uart->port);
 
                uart->port.ctrl_id      = up->port.ctrl_id;
+               uart->port.port_id      = up->port.port_id;
                uart->port.iobase       = up->port.iobase;
                uart->port.membase      = up->port.membase;
                uart->port.irq          = up->port.irq;
@@ -1202,6 +1204,7 @@ void serial8250_unregister_port(int line)
                uart->port.flags &= ~UPF_BOOT_AUTOCONF;
                uart->port.type = PORT_UNKNOWN;
                uart->port.dev = &serial8250_isa_devs->dev;
+               uart->port.port_id = line;
                uart->capabilities = 0;
                serial8250_init_port(uart);
                serial8250_apply_quirks(uart);
index 75f32f0..84843e2 100644 (file)
@@ -244,7 +244,7 @@ void dw8250_setup_port(struct uart_port *p)
        struct dw8250_port_data *pd = p->private_data;
        struct dw8250_data *data = to_dw8250_data(pd);
        struct uart_8250_port *up = up_to_u8250p(p);
-       u32 reg;
+       u32 reg, old_dlf;
 
        pd->hw_rs485_support = dw8250_detect_rs485_hw(p);
        if (pd->hw_rs485_support) {
@@ -270,9 +270,11 @@ void dw8250_setup_port(struct uart_port *p)
        dev_dbg(p->dev, "Designware UART version %c.%c%c\n",
                (reg >> 24) & 0xff, (reg >> 16) & 0xff, (reg >> 8) & 0xff);
 
+       /* Preserve value written by firmware or bootloader  */
+       old_dlf = dw8250_readl_ext(p, DW_UART_DLF);
        dw8250_writel_ext(p, DW_UART_DLF, ~0U);
        reg = dw8250_readl_ext(p, DW_UART_DLF);
-       dw8250_writel_ext(p, DW_UART_DLF, 0);
+       dw8250_writel_ext(p, DW_UART_DLF, old_dlf);
 
        if (reg) {
                pd->dlf_size = fls(reg);
index 16aeb14..483bb55 100644 (file)
@@ -703,9 +703,6 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 
 static void serial8250_clear_IER(struct uart_8250_port *up)
 {
-       /* Port locked to synchronize UART_IER access against the console. */
-       lockdep_assert_held_once(&up->port.lock);
-
        if (up->capabilities & UART_CAP_UUE)
                serial_out(up, UART_IER, UART_IER_UUE);
        else
@@ -3278,6 +3275,7 @@ void serial8250_init_port(struct uart_8250_port *up)
 
        spin_lock_init(&port->lock);
        port->ctrl_id = 0;
+       port->pm = NULL;
        port->ops = &serial8250_pops;
        port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
 
index 4d80fae..c569a08 100644 (file)
@@ -1139,8 +1139,8 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
                unsigned long sr = lpuart32_read(&sport->port, UARTSTAT);
 
                if (sr & (UARTSTAT_PE | UARTSTAT_FE)) {
-                       /* Read DR to clear the error flags */
-                       lpuart32_read(&sport->port, UARTDATA);
+                       /* Clear the error flags */
+                       lpuart32_write(&sport->port, sr, UARTSTAT);
 
                        if (sr & UARTSTAT_PE)
                                sport->port.icount.parity++;
index 444c74e..daaf2a6 100644 (file)
@@ -1681,13 +1681,6 @@ static int qcom_geni_serial_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       /*
-        * Set pm_runtime status as ACTIVE so that wakeup_irq gets
-        * enabled/disabled from dev_pm_arm_wake_irq during system
-        * suspend/resume respectively.
-        */
-       pm_runtime_set_active(&pdev->dev);
-
        if (port->wakeup_irq > 0) {
                device_init_wakeup(&pdev->dev, true);
                ret = dev_pm_set_dedicated_wake_irq(&pdev->dev,
index 9faac0f..c74c548 100644 (file)
@@ -16,6 +16,7 @@ struct device;
 
 struct serial_ctrl_device {
        struct device dev;
+       struct ida port_ida;
 };
 
 struct serial_port_device {
index 6ff59c8..3dfcf20 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/container_of.h>
 #include <linux/device.h>
+#include <linux/idr.h>
 #include <linux/module.h>
 #include <linux/serial_core.h>
 #include <linux/slab.h>
 
 static bool serial_base_initialized;
 
+static const struct device_type serial_ctrl_type = {
+       .name = "ctrl",
+};
+
+static const struct device_type serial_port_type = {
+       .name = "port",
+};
+
 static int serial_base_match(struct device *dev, struct device_driver *drv)
 {
-       int len = strlen(drv->name);
+       if (dev->type == &serial_ctrl_type &&
+           str_has_prefix(drv->name, serial_ctrl_type.name))
+               return 1;
 
-       return !strncmp(dev_name(dev), drv->name, len);
+       if (dev->type == &serial_port_type &&
+           str_has_prefix(drv->name, serial_port_type.name))
+               return 1;
+
+       return 0;
 }
 
 static struct bus_type serial_base_bus_type = {
@@ -48,7 +63,8 @@ static int serial_base_device_init(struct uart_port *port,
                                   struct device *parent_dev,
                                   const struct device_type *type,
                                   void (*release)(struct device *dev),
-                                  int id)
+                                  unsigned int ctrl_id,
+                                  unsigned int port_id)
 {
        device_initialize(dev);
        dev->type = type;
@@ -61,12 +77,15 @@ static int serial_base_device_init(struct uart_port *port,
                return -EPROBE_DEFER;
        }
 
-       return dev_set_name(dev, "%s.%s.%d", type->name, dev_name(port->dev), id);
-}
+       if (type == &serial_ctrl_type)
+               return dev_set_name(dev, "%s:%d", dev_name(port->dev), ctrl_id);
 
-static const struct device_type serial_ctrl_type = {
-       .name = "ctrl",
-};
+       if (type == &serial_port_type)
+               return dev_set_name(dev, "%s:%d.%d", dev_name(port->dev),
+                                   ctrl_id, port_id);
+
+       return -EINVAL;
+}
 
 static void serial_base_ctrl_release(struct device *dev)
 {
@@ -81,6 +100,7 @@ void serial_base_ctrl_device_remove(struct serial_ctrl_device *ctrl_dev)
                return;
 
        device_del(&ctrl_dev->dev);
+       put_device(&ctrl_dev->dev);
 }
 
 struct serial_ctrl_device *serial_base_ctrl_add(struct uart_port *port,
@@ -93,10 +113,12 @@ struct serial_ctrl_device *serial_base_ctrl_add(struct uart_port *port,
        if (!ctrl_dev)
                return ERR_PTR(-ENOMEM);
 
+       ida_init(&ctrl_dev->port_ida);
+
        err = serial_base_device_init(port, &ctrl_dev->dev,
                                      parent, &serial_ctrl_type,
                                      serial_base_ctrl_release,
-                                     port->ctrl_id);
+                                     port->ctrl_id, 0);
        if (err)
                goto err_put_device;
 
@@ -112,10 +134,6 @@ err_put_device:
        return ERR_PTR(err);
 }
 
-static const struct device_type serial_port_type = {
-       .name = "port",
-};
-
 static void serial_base_port_release(struct device *dev)
 {
        struct serial_port_device *port_dev = to_serial_base_port_device(dev);
@@ -127,16 +145,31 @@ struct serial_port_device *serial_base_port_add(struct uart_port *port,
                                                struct serial_ctrl_device *ctrl_dev)
 {
        struct serial_port_device *port_dev;
+       int min = 0, max = -1;  /* Use -1 for max to apply IDA defaults */
        int err;
 
        port_dev = kzalloc(sizeof(*port_dev), GFP_KERNEL);
        if (!port_dev)
                return ERR_PTR(-ENOMEM);
 
+       /* Device driver specified port_id vs automatic assignment? */
+       if (port->port_id) {
+               min = port->port_id;
+               max = port->port_id;
+       }
+
+       err = ida_alloc_range(&ctrl_dev->port_ida, min, max, GFP_KERNEL);
+       if (err < 0) {
+               kfree(port_dev);
+               return ERR_PTR(err);
+       }
+
+       port->port_id = err;
+
        err = serial_base_device_init(port, &port_dev->dev,
                                      &ctrl_dev->dev, &serial_port_type,
                                      serial_base_port_release,
-                                     port->line);
+                                     port->ctrl_id, port->port_id);
        if (err)
                goto err_put_device;
 
@@ -150,16 +183,25 @@ struct serial_port_device *serial_base_port_add(struct uart_port *port,
 
 err_put_device:
        put_device(&port_dev->dev);
+       ida_free(&ctrl_dev->port_ida, port->port_id);
 
        return ERR_PTR(err);
 }
 
 void serial_base_port_device_remove(struct serial_port_device *port_dev)
 {
+       struct serial_ctrl_device *ctrl_dev;
+       struct device *parent;
+
        if (!port_dev)
                return;
 
+       parent = port_dev->dev.parent;
+       ctrl_dev = to_serial_base_ctrl_device(parent);
+
        device_del(&port_dev->dev);
+       ida_free(&ctrl_dev->port_ida, port_dev->port->port_id);
+       put_device(&port_dev->dev);
 }
 
 static int serial_base_init(void)
index 7c94579..8b7a42e 100644 (file)
@@ -590,7 +590,7 @@ static void sci_start_tx(struct uart_port *port)
            dma_submit_error(s->cookie_tx)) {
                if (s->cfg->regtype == SCIx_RZ_SCIFA_REGTYPE)
                        /* Switch irq from SCIF to DMA */
-                       disable_irq(s->irqs[SCIx_TXI_IRQ]);
+                       disable_irq_nosync(s->irqs[SCIx_TXI_IRQ]);
 
                s->cookie_tx = 0;
                schedule_work(&s->work_tx);
index 1f565a2..a19db49 100644 (file)
@@ -811,7 +811,7 @@ static void sifive_serial_console_write(struct console *co, const char *s,
        local_irq_restore(flags);
 }
 
-static int __init sifive_serial_console_setup(struct console *co, char *options)
+static int sifive_serial_console_setup(struct console *co, char *options)
 {
        struct sifive_serial_port *ssp;
        int baud = SIFIVE_DEFAULT_BAUD_RATE;
index 404230c..0a370b9 100644 (file)
@@ -59,7 +59,7 @@ static int firmware_loaded;
 /* #define LOOPBACK */
 
 /* The major and minor device numbers are defined in
- * http://www.lanana.org/docs/device-list/devices-2.6+.txt.  For the QE
+ * Documentation/admin-guide/devices.txt.  For the QE
  * UART, we have major number 204 and minor numbers 46 - 49, which are the
  * same as for the CPM2.  This decision was made because no Freescale part
  * has both a CPM and a QE.
index 3959efc..63db04b 100644 (file)
@@ -2285,7 +2285,7 @@ static int tiocsti(struct tty_struct *tty, char __user *p)
        char ch, mbz = 0;
        struct tty_ldisc *ld;
 
-       if (!tty_legacy_tiocsti)
+       if (!tty_legacy_tiocsti && !capable(CAP_SYS_ADMIN))
                return -EIO;
 
        if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
index 6fb0e00..386674e 100644 (file)
@@ -580,7 +580,6 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
 {
        struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
        struct utp_transfer_req_desc *utrd;
-       u32 mask = hwq->max_entries - 1;
        __le64  cmd_desc_base_addr;
        bool ret = false;
        u64 addr, match;
@@ -608,7 +607,10 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
                        ret = true;
                        goto out;
                }
-               sq_head_slot = (sq_head_slot + 1) & mask;
+
+               sq_head_slot++;
+               if (sq_head_slot == hwq->max_entries)
+                       sq_head_slot = 0;
        }
 
 out:
index 983fae8..1294467 100644 (file)
@@ -8520,6 +8520,41 @@ out:
        return ret;
 }
 
+static void ufshcd_set_timestamp_attr(struct ufs_hba *hba)
+{
+       int err;
+       struct ufs_query_req *request = NULL;
+       struct ufs_query_res *response = NULL;
+       struct ufs_dev_info *dev_info = &hba->dev_info;
+       struct utp_upiu_query_v4_0 *upiu_data;
+
+       if (dev_info->wspecversion < 0x400)
+               return;
+
+       ufshcd_hold(hba);
+
+       mutex_lock(&hba->dev_cmd.lock);
+
+       ufshcd_init_query(hba, &request, &response,
+                         UPIU_QUERY_OPCODE_WRITE_ATTR,
+                         QUERY_ATTR_IDN_TIMESTAMP, 0, 0);
+
+       request->query_func = UPIU_QUERY_FUNC_STANDARD_WRITE_REQUEST;
+
+       upiu_data = (struct utp_upiu_query_v4_0 *)&request->upiu_req;
+
+       put_unaligned_be64(ktime_get_real_ns(), &upiu_data->osf3);
+
+       err = ufshcd_exec_dev_cmd(hba, DEV_CMD_TYPE_QUERY, QUERY_REQ_TIMEOUT);
+
+       if (err)
+               dev_err(hba->dev, "%s: failed to set timestamp %d\n",
+                       __func__, err);
+
+       mutex_unlock(&hba->dev_cmd.lock);
+       ufshcd_release(hba);
+}
+
 /**
  * ufshcd_add_lus - probe and add UFS logical units
  * @hba: per-adapter instance
@@ -8708,6 +8743,8 @@ static int ufshcd_device_init(struct ufs_hba *hba, bool init_dev_params)
        ufshcd_set_ufs_dev_active(hba);
        ufshcd_force_reset_auto_bkops(hba);
 
+       ufshcd_set_timestamp_attr(hba);
+
        /* Gear up to HS gear if supported */
        if (hba->max_pwr_info.is_valid) {
                /*
@@ -9749,6 +9786,7 @@ static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
                ret = ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE);
                if (ret)
                        goto set_old_link_state;
+               ufshcd_set_timestamp_attr(hba);
        }
 
        if (ufshcd_keep_autobkops_enabled_except_suspend(hba))
index 16624ba..580c8d0 100644 (file)
@@ -72,6 +72,7 @@ config SCSI_UFS_QCOM
 config SCSI_UFS_MEDIATEK
        tristate "Mediatek specific hooks to UFS controller platform driver"
        depends on SCSI_UFSHCD_PLATFORM && ARCH_MEDIATEK
+       depends on RESET_CONTROLLER
        select PHY_MTK_UFS
        select RESET_TI_SYSCON
        help
index 8d6fd4c..c1557d2 100644 (file)
@@ -321,7 +321,7 @@ static void ufs_qcom_select_unipro_mode(struct ufs_qcom_host *host)
                   ufs_qcom_cap_qunipro(host) ? QUNIPRO_SEL : 0,
                   REG_UFS_CFG1);
 
-       if (host->hw_ver.major == 0x05)
+       if (host->hw_ver.major >= 0x05)
                ufshcd_rmwl(host->hba, QUNIPRO_G4_SEL, 0, REG_UFS_CFG0);
 
        /* make sure above configuration is applied before we return */
index f8a5e79..ab0652d 100644 (file)
@@ -359,7 +359,7 @@ static int ufs_renesas_init(struct ufs_hba *hba)
 {
        struct ufs_renesas_priv *priv;
 
-       priv = devm_kmalloc(hba->dev, sizeof(*priv), GFP_KERNEL);
+       priv = devm_kzalloc(hba->dev, sizeof(*priv), GFP_KERNEL);
        if (!priv)
                return -ENOMEM;
        ufshcd_set_variant(hba, priv);
index ea19253..aa0111b 100644 (file)
@@ -3015,12 +3015,14 @@ static int cdns3_gadget_udc_stop(struct usb_gadget *gadget)
 static int cdns3_gadget_check_config(struct usb_gadget *gadget)
 {
        struct cdns3_device *priv_dev = gadget_to_cdns3_device(gadget);
+       struct cdns3_endpoint *priv_ep;
        struct usb_ep *ep;
        int n_in = 0;
        int total;
 
        list_for_each_entry(ep, &gadget->ep_list, ep_list) {
-               if (ep->claimed && (ep->address & USB_DIR_IN))
+               priv_ep = ep_to_cdns3_ep(ep);
+               if ((priv_ep->flags & EP_CLAIMED) && (ep->address & USB_DIR_IN))
                        n_in++;
        }
 
index 766005d..501e8bc 100644 (file)
@@ -42,6 +42,7 @@ struct usb_conn_info {
 
        struct power_supply_desc desc;
        struct power_supply *charger;
+       bool initial_detection;
 };
 
 /*
@@ -86,11 +87,13 @@ static void usb_conn_detect_cable(struct work_struct *work)
        dev_dbg(info->dev, "role %s -> %s, gpios: id %d, vbus %d\n",
                usb_role_string(info->last_role), usb_role_string(role), id, vbus);
 
-       if (info->last_role == role) {
+       if (!info->initial_detection && info->last_role == role) {
                dev_warn(info->dev, "repeated role: %s\n", usb_role_string(role));
                return;
        }
 
+       info->initial_detection = false;
+
        if (info->last_role == USB_ROLE_HOST && info->vbus)
                regulator_disable(info->vbus);
 
@@ -258,6 +261,7 @@ static int usb_conn_probe(struct platform_device *pdev)
        device_set_wakeup_capable(&pdev->dev, true);
 
        /* Perform initial detection */
+       info->initial_detection = true;
        usb_conn_queue_dwork(info, 0);
 
        return 0;
index 1a16a8b..4f68f6e 100644 (file)
@@ -2642,21 +2642,21 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
                snoop(&dev->dev, "%s: CONTROL\n", __func__);
                ret = proc_control(ps, p);
                if (ret >= 0)
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                break;
 
        case USBDEVFS_BULK:
                snoop(&dev->dev, "%s: BULK\n", __func__);
                ret = proc_bulk(ps, p);
                if (ret >= 0)
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                break;
 
        case USBDEVFS_RESETEP:
                snoop(&dev->dev, "%s: RESETEP\n", __func__);
                ret = proc_resetep(ps, p);
                if (ret >= 0)
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                break;
 
        case USBDEVFS_RESET:
@@ -2668,7 +2668,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
                snoop(&dev->dev, "%s: CLEAR_HALT\n", __func__);
                ret = proc_clearhalt(ps, p);
                if (ret >= 0)
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                break;
 
        case USBDEVFS_GETDRIVER:
@@ -2695,7 +2695,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
                snoop(&dev->dev, "%s: SUBMITURB\n", __func__);
                ret = proc_submiturb(ps, p);
                if (ret >= 0)
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                break;
 
 #ifdef CONFIG_COMPAT
@@ -2703,14 +2703,14 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
                snoop(&dev->dev, "%s: CONTROL32\n", __func__);
                ret = proc_control_compat(ps, p);
                if (ret >= 0)
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                break;
 
        case USBDEVFS_BULK32:
                snoop(&dev->dev, "%s: BULK32\n", __func__);
                ret = proc_bulk_compat(ps, p);
                if (ret >= 0)
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                break;
 
        case USBDEVFS_DISCSIGNAL32:
@@ -2722,7 +2722,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
                snoop(&dev->dev, "%s: SUBMITURB32\n", __func__);
                ret = proc_submiturb_compat(ps, p);
                if (ret >= 0)
-                       inode->i_mtime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                break;
 
        case USBDEVFS_IOCTL32:
index 934b3d9..15e9bd1 100644 (file)
@@ -436,6 +436,10 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* novation SoundControl XL */
        { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME },
 
+       /* Focusrite Scarlett Solo USB */
+       { USB_DEVICE(0x1235, 0x8211), .driver_info =
+                       USB_QUIRK_DISCONNECT_SUSPEND },
+
        /* Huawei 4G LTE module */
        { USB_DEVICE(0x12d1, 0x15bb), .driver_info =
                        USB_QUIRK_DISCONNECT_SUSPEND },
index f6689b7..9c6bf05 100644 (file)
@@ -277,9 +277,9 @@ int dwc3_core_soft_reset(struct dwc3 *dwc)
        /*
         * We're resetting only the device side because, if we're in host mode,
         * XHCI driver will reset the host block. If dwc3 was configured for
-        * host-only mode, then we can return early.
+        * host-only mode or current role is host, then we can return early.
         */
-       if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
+       if (dwc->dr_mode == USB_DR_MODE_HOST || dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
                return 0;
 
        reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -1209,22 +1209,6 @@ static int dwc3_core_init(struct dwc3 *dwc)
                dwc3_writel(dwc->regs, DWC3_GUCTL1, reg);
        }
 
-       if (dwc->dr_mode == USB_DR_MODE_HOST ||
-           dwc->dr_mode == USB_DR_MODE_OTG) {
-               reg = dwc3_readl(dwc->regs, DWC3_GUCTL);
-
-               /*
-                * Enable Auto retry Feature to make the controller operating in
-                * Host mode on seeing transaction errors(CRC errors or internal
-                * overrun scenerios) on IN transfers to reply to the device
-                * with a non-terminating retry ACK (i.e, an ACK transcation
-                * packet with Retry=1 & Nump != 0)
-                */
-               reg |= DWC3_GUCTL_HSTINAUTORETRY;
-
-               dwc3_writel(dwc->regs, DWC3_GUCTL, reg);
-       }
-
        /*
         * Must config both number of packets and max burst settings to enable
         * RX and/or TX threshold.
index 8b1295e..a69ac67 100644 (file)
 #define DWC3_GCTL_GBLHIBERNATIONEN     BIT(1)
 #define DWC3_GCTL_DSBLCLKGTNG          BIT(0)
 
-/* Global User Control Register */
-#define DWC3_GUCTL_HSTINAUTORETRY      BIT(14)
-
 /* Global User Control 1 Register */
 #define DWC3_GUCTL1_DEV_DECOUPLE_L1L2_EVT      BIT(31)
 #define DWC3_GUCTL1_TX_IPGAP_LINECHECK_DIS     BIT(28)
index 44a04c9..6604845 100644 (file)
@@ -233,10 +233,12 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc,
 
                        /*
                         * A lot of BYT devices lack ACPI resource entries for
-                        * the GPIOs, add a fallback mapping to the reference
+                        * the GPIOs. If the ACPI entry for the GPIO controller
+                        * is present add a fallback mapping to the reference
                         * design GPIOs which all boards seem to use.
                         */
-                       gpiod_add_lookup_table(&platform_bytcr_gpios);
+                       if (acpi_dev_present("INT33FC", NULL, -1))
+                               gpiod_add_lookup_table(&platform_bytcr_gpios);
 
                        /*
                         * These GPIOs will turn on the USB2 PHY. Note that we have to
index 5fd0671..858fe4c 100644 (file)
@@ -4455,9 +4455,14 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
        u32 count;
 
        if (pm_runtime_suspended(dwc->dev)) {
+               dwc->pending_events = true;
+               /*
+                * Trigger runtime resume. The get() function will be balanced
+                * after processing the pending events in dwc3_process_pending
+                * events().
+                */
                pm_runtime_get(dwc->dev);
                disable_irq_nosync(dwc->irq_gadget);
-               dwc->pending_events = true;
                return IRQ_HANDLED;
        }
 
@@ -4718,6 +4723,8 @@ void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
 {
        if (dwc->pending_events) {
                dwc3_interrupt(dwc->irq_gadget, dwc->ev_buf);
+               dwc3_thread_interrupt(dwc->irq_gadget, dwc->ev_buf);
+               pm_runtime_put(dwc->dev);
                dwc->pending_events = false;
                enable_irq(dwc->irq_gadget);
        }
index 1b34891..dd9b904 100644 (file)
@@ -1125,6 +1125,10 @@ int usb_add_config(struct usb_composite_dev *cdev,
                goto done;
 
        status = bind(config);
+
+       if (status == 0)
+               status = usb_gadget_check_config(cdev->gadget);
+
        if (status < 0) {
                while (!list_empty(&config->functions)) {
                        struct usb_function             *f;
index f41a385..6e9ef35 100644 (file)
@@ -1377,7 +1377,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
        inode = new_inode(sb);
 
        if (inode) {
-               struct timespec64 ts = current_time(inode);
+               struct timespec64 ts = inode_set_ctime_current(inode);
 
                inode->i_ino     = get_next_ino();
                inode->i_mode    = perms->mode;
@@ -1385,7 +1385,6 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
                inode->i_gid     = perms->gid;
                inode->i_atime   = ts;
                inode->i_mtime   = ts;
-               inode->i_ctime   = ts;
                inode->i_private = data;
                if (fops)
                        inode->i_fop = fops;
index 28249d0..ce9e31f 100644 (file)
@@ -1969,8 +1969,7 @@ gadgetfs_make_inode (struct super_block *sb,
                inode->i_mode = mode;
                inode->i_uid = make_kuid(&init_user_ns, default_uid);
                inode->i_gid = make_kgid(&init_user_ns, default_gid);
-               inode->i_atime = inode->i_mtime = inode->i_ctime
-                               = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                inode->i_private = data;
                inode->i_fop = fops;
        }
index 2acece1..e549022 100644 (file)
@@ -310,13 +310,15 @@ static int gadget_bind(struct usb_gadget *gadget,
        dev->eps_num = i;
        spin_unlock_irqrestore(&dev->lock, flags);
 
-       /* Matches kref_put() in gadget_unbind(). */
-       kref_get(&dev->count);
-
        ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL);
-       if (ret < 0)
+       if (ret < 0) {
                dev_err(&gadget->dev, "failed to queue event\n");
+               set_gadget_data(gadget, NULL);
+               return ret;
+       }
 
+       /* Matches kref_put() in gadget_unbind(). */
+       kref_get(&dev->count);
        return ret;
 }
 
index 59188ea..7d49d8a 100644 (file)
@@ -822,6 +822,9 @@ EXPORT_SYMBOL_GPL(usb_gadget_disconnect);
  * usb_gadget_activate() is called.  For example, user mode components may
  * need to be activated before the system can talk to hosts.
  *
+ * This routine may sleep; it must not be called in interrupt context
+ * (such as from within a gadget driver's disconnect() callback).
+ *
  * Returns zero on success, else negative errno.
  */
 int usb_gadget_deactivate(struct usb_gadget *gadget)
@@ -860,6 +863,8 @@ EXPORT_SYMBOL_GPL(usb_gadget_deactivate);
  * This routine activates gadget which was previously deactivated with
  * usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed.
  *
+ * This routine may sleep; it must not be called in interrupt context.
+ *
  * Returns zero on success, else negative errno.
  */
 int usb_gadget_activate(struct usb_gadget *gadget)
@@ -878,7 +883,6 @@ int usb_gadget_activate(struct usb_gadget *gadget)
         */
        if (gadget->connected)
                ret = usb_gadget_connect_locked(gadget);
-       mutex_unlock(&gadget->udc->connect_lock);
 
 unlock:
        mutex_unlock(&gadget->udc->connect_lock);
@@ -1639,7 +1643,11 @@ static void gadget_unbind_driver(struct device *dev)
        usb_gadget_disable_async_callbacks(udc);
        if (gadget->irq)
                synchronize_irq(gadget->irq);
+       mutex_unlock(&udc->connect_lock);
+
        udc->driver->unbind(gadget);
+
+       mutex_lock(&udc->connect_lock);
        usb_gadget_udc_stop_locked(udc);
        mutex_unlock(&udc->connect_lock);
 
index 83eaa65..df6028f 100644 (file)
@@ -3718,15 +3718,15 @@ static int tegra_xudc_powerdomain_init(struct tegra_xudc *xudc)
        int err;
 
        xudc->genpd_dev_device = dev_pm_domain_attach_by_name(dev, "dev");
-       if (IS_ERR_OR_NULL(xudc->genpd_dev_device)) {
-               err = PTR_ERR(xudc->genpd_dev_device) ? : -ENODATA;
+       if (IS_ERR(xudc->genpd_dev_device)) {
+               err = PTR_ERR(xudc->genpd_dev_device);
                dev_err(dev, "failed to get device power domain: %d\n", err);
                return err;
        }
 
        xudc->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "ss");
-       if (IS_ERR_OR_NULL(xudc->genpd_dev_ss)) {
-               err = PTR_ERR(xudc->genpd_dev_ss) ? : -ENODATA;
+       if (IS_ERR(xudc->genpd_dev_ss)) {
+               err = PTR_ERR(xudc->genpd_dev_ss);
                dev_err(dev, "failed to get SuperSpeed power domain: %d\n", err);
                return err;
        }
index b9ce8d8..b805c4b 100644 (file)
@@ -672,7 +672,13 @@ ohci_hcd_at91_drv_resume(struct device *dev)
        else
                at91_start_clock(ohci_at91);
 
-       ohci_resume(hcd, false);
+       /*
+        * According to the comment in ohci_hcd_at91_drv_suspend()
+        * we need to do a reset if the 48Mhz clock was stopped,
+        * that is, if ohci_at91->wakeup is clear. Tell ohci_resume()
+        * to reset in this case by setting its "hibernated" flag.
+        */
+       ohci_resume(hcd, !ohci_at91->wakeup);
 
        return 0;
 }
index 51d9d4d..bbdf1b0 100644 (file)
@@ -586,6 +586,7 @@ static int xhci_mtk_probe(struct platform_device *pdev)
        }
 
        device_init_wakeup(dev, true);
+       dma_set_max_seg_size(dev, UINT_MAX);
 
        xhci = hcd_to_xhci(hcd);
        xhci->main_hcd = hcd;
index c6742ba..b9ae5c2 100644 (file)
@@ -479,10 +479,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
                        pdev->device == 0x3432)
                xhci->quirks |= XHCI_BROKEN_STREAMS;
 
-       if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483) {
+       if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483)
                xhci->quirks |= XHCI_LPM_SUPPORT;
-               xhci->quirks |= XHCI_EP_CTX_BROKEN_DCS;
-       }
 
        if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
                pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) {
index 646ff12..1dde53f 100644 (file)
@@ -626,11 +626,8 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci,
        struct xhci_ring *ep_ring;
        struct xhci_command *cmd;
        struct xhci_segment *new_seg;
-       struct xhci_segment *halted_seg = NULL;
        union xhci_trb *new_deq;
        int new_cycle;
-       union xhci_trb *halted_trb;
-       int index = 0;
        dma_addr_t addr;
        u64 hw_dequeue;
        bool cycle_found = false;
@@ -668,27 +665,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci,
        hw_dequeue = xhci_get_hw_deq(xhci, dev, ep_index, stream_id);
        new_seg = ep_ring->deq_seg;
        new_deq = ep_ring->dequeue;
-
-       /*
-        * Quirk: xHC write-back of the DCS field in the hardware dequeue
-        * pointer is wrong - use the cycle state of the TRB pointed to by
-        * the dequeue pointer.
-        */
-       if (xhci->quirks & XHCI_EP_CTX_BROKEN_DCS &&
-           !(ep->ep_state & EP_HAS_STREAMS))
-               halted_seg = trb_in_td(xhci, td->start_seg,
-                                      td->first_trb, td->last_trb,
-                                      hw_dequeue & ~0xf, false);
-       if (halted_seg) {
-               index = ((dma_addr_t)(hw_dequeue & ~0xf) - halted_seg->dma) /
-                        sizeof(*halted_trb);
-               halted_trb = &halted_seg->trbs[index];
-               new_cycle = halted_trb->generic.field[3] & 0x1;
-               xhci_dbg(xhci, "Endpoint DCS = %d TRB index = %d cycle = %d\n",
-                        (u8)(hw_dequeue & 0x1), index, new_cycle);
-       } else {
-               new_cycle = hw_dequeue & 0x1;
-       }
+       new_cycle = hw_dequeue & 0x1;
 
        /*
         * We want to find the pointer, segment and cycle state of the new trb
index 6ca8a37..4693d83 100644 (file)
@@ -1145,15 +1145,15 @@ static int tegra_xusb_powerdomain_init(struct device *dev,
        int err;
 
        tegra->genpd_dev_host = dev_pm_domain_attach_by_name(dev, "xusb_host");
-       if (IS_ERR_OR_NULL(tegra->genpd_dev_host)) {
-               err = PTR_ERR(tegra->genpd_dev_host) ? : -ENODATA;
+       if (IS_ERR(tegra->genpd_dev_host)) {
+               err = PTR_ERR(tegra->genpd_dev_host);
                dev_err(dev, "failed to get host pm-domain: %d\n", err);
                return err;
        }
 
        tegra->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "xusb_ss");
-       if (IS_ERR_OR_NULL(tegra->genpd_dev_ss)) {
-               err = PTR_ERR(tegra->genpd_dev_ss) ? : -ENODATA;
+       if (IS_ERR(tegra->genpd_dev_ss)) {
+               err = PTR_ERR(tegra->genpd_dev_ss);
                dev_err(dev, "failed to get superspeed pm-domain: %d\n", err);
                return err;
        }
index 986d658..36b6e9f 100644 (file)
@@ -77,7 +77,7 @@ static int ehset_probe(struct usb_interface *intf,
        switch (test_pid) {
        case TEST_SE0_NAK_PID:
                ret = ehset_prepare_port_for_testing(hub_udev, portnum);
-               if (!ret)
+               if (ret < 0)
                        break;
                ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE,
                                           USB_RT_PORT, USB_PORT_FEAT_TEST,
@@ -86,7 +86,7 @@ static int ehset_probe(struct usb_interface *intf,
                break;
        case TEST_J_PID:
                ret = ehset_prepare_port_for_testing(hub_udev, portnum);
-               if (!ret)
+               if (ret < 0)
                        break;
                ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE,
                                           USB_RT_PORT, USB_PORT_FEAT_TEST,
@@ -95,7 +95,7 @@ static int ehset_probe(struct usb_interface *intf,
                break;
        case TEST_K_PID:
                ret = ehset_prepare_port_for_testing(hub_udev, portnum);
-               if (!ret)
+               if (ret < 0)
                        break;
                ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE,
                                           USB_RT_PORT, USB_PORT_FEAT_TEST,
@@ -104,7 +104,7 @@ static int ehset_probe(struct usb_interface *intf,
                break;
        case TEST_PACKET_PID:
                ret = ehset_prepare_port_for_testing(hub_udev, portnum);
-               if (!ret)
+               if (ret < 0)
                        break;
                ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE,
                                           USB_RT_PORT, USB_PORT_FEAT_TEST,
index 288a96a..8ac98e6 100644 (file)
@@ -251,6 +251,7 @@ static void option_instat_callback(struct urb *urb);
 #define QUECTEL_PRODUCT_EM061K_LTA             0x0123
 #define QUECTEL_PRODUCT_EM061K_LMS             0x0124
 #define QUECTEL_PRODUCT_EC25                   0x0125
+#define QUECTEL_PRODUCT_EM060K_128             0x0128
 #define QUECTEL_PRODUCT_EG91                   0x0191
 #define QUECTEL_PRODUCT_EG95                   0x0195
 #define QUECTEL_PRODUCT_BG96                   0x0296
@@ -268,6 +269,7 @@ static void option_instat_callback(struct urb *urb);
 #define QUECTEL_PRODUCT_RM520N                 0x0801
 #define QUECTEL_PRODUCT_EC200U                 0x0901
 #define QUECTEL_PRODUCT_EC200S_CN              0x6002
+#define QUECTEL_PRODUCT_EC200A                 0x6005
 #define QUECTEL_PRODUCT_EM061K_LWW             0x6008
 #define QUECTEL_PRODUCT_EM061K_LCN             0x6009
 #define QUECTEL_PRODUCT_EC200T                 0x6026
@@ -1197,6 +1199,9 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) },
+       { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) },
+       { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) },
+       { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) },
@@ -1225,6 +1230,7 @@ static const struct usb_device_id option_ids[] = {
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0900, 0xff, 0, 0), /* RM500U-CN */
          .driver_info = ZLP },
+       { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200A, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) },
        { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) },
index 4c67478..24b8772 100644 (file)
@@ -38,16 +38,6 @@ static struct usb_serial_driver vendor##_device = {          \
        { USB_DEVICE(0x0a21, 0x8001) }  /* MMT-7305WW */
 DEVICE(carelink, CARELINK_IDS);
 
-/* ZIO Motherboard USB driver */
-#define ZIO_IDS()                      \
-       { USB_DEVICE(0x1CBE, 0x0103) }
-DEVICE(zio, ZIO_IDS);
-
-/* Funsoft Serial USB driver */
-#define FUNSOFT_IDS()                  \
-       { USB_DEVICE(0x1404, 0xcddc) }
-DEVICE(funsoft, FUNSOFT_IDS);
-
 /* Infineon Flashloader driver */
 #define FLASHLOADER_IDS()              \
        { USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \
@@ -55,6 +45,11 @@ DEVICE(funsoft, FUNSOFT_IDS);
        { USB_DEVICE(0x8087, 0x0801) }
 DEVICE(flashloader, FLASHLOADER_IDS);
 
+/* Funsoft Serial USB driver */
+#define FUNSOFT_IDS()                  \
+       { USB_DEVICE(0x1404, 0xcddc) }
+DEVICE(funsoft, FUNSOFT_IDS);
+
 /* Google Serial USB SubClass */
 #define GOOGLE_IDS()                                           \
        { USB_VENDOR_AND_INTERFACE_INFO(0x18d1,                 \
@@ -63,16 +58,21 @@ DEVICE(flashloader, FLASHLOADER_IDS);
                                        0x01) }
 DEVICE(google, GOOGLE_IDS);
 
+/* HP4x (48/49) Generic Serial driver */
+#define HP4X_IDS()                     \
+       { USB_DEVICE(0x03f0, 0x0121) }
+DEVICE(hp4x, HP4X_IDS);
+
+/* KAUFMANN RKS+CAN VCP */
+#define KAUFMANN_IDS()                 \
+       { USB_DEVICE(0x16d0, 0x0870) }
+DEVICE(kaufmann, KAUFMANN_IDS);
+
 /* Libtransistor USB console */
 #define LIBTRANSISTOR_IDS()                    \
        { USB_DEVICE(0x1209, 0x8b00) }
 DEVICE(libtransistor, LIBTRANSISTOR_IDS);
 
-/* ViVOpay USB Serial Driver */
-#define VIVOPAY_IDS()                  \
-       { USB_DEVICE(0x1d5f, 0x1004) }  /* ViVOpay 8800 */
-DEVICE(vivopay, VIVOPAY_IDS);
-
 /* Motorola USB Phone driver */
 #define MOTO_IDS()                     \
        { USB_DEVICE(0x05c6, 0x3197) }, /* unknown Motorola phone */    \
@@ -101,10 +101,10 @@ DEVICE(nokia, NOKIA_IDS);
        { USB_DEVICE(0x09d7, 0x0100) }  /* NovAtel FlexPack GPS */
 DEVICE_N(novatel_gps, NOVATEL_IDS, 3);
 
-/* HP4x (48/49) Generic Serial driver */
-#define HP4X_IDS()                     \
-       { USB_DEVICE(0x03f0, 0x0121) }
-DEVICE(hp4x, HP4X_IDS);
+/* Siemens USB/MPI adapter */
+#define SIEMENS_IDS()                  \
+       { USB_DEVICE(0x908, 0x0004) }
+DEVICE(siemens_mpi, SIEMENS_IDS);
 
 /* Suunto ANT+ USB Driver */
 #define SUUNTO_IDS()                   \
@@ -112,45 +112,52 @@ DEVICE(hp4x, HP4X_IDS);
        { USB_DEVICE(0x0fcf, 0x1009) } /* Dynastream ANT USB-m Stick */
 DEVICE(suunto, SUUNTO_IDS);
 
-/* Siemens USB/MPI adapter */
-#define SIEMENS_IDS()                  \
-       { USB_DEVICE(0x908, 0x0004) }
-DEVICE(siemens_mpi, SIEMENS_IDS);
+/* ViVOpay USB Serial Driver */
+#define VIVOPAY_IDS()                  \
+       { USB_DEVICE(0x1d5f, 0x1004) }  /* ViVOpay 8800 */
+DEVICE(vivopay, VIVOPAY_IDS);
+
+/* ZIO Motherboard USB driver */
+#define ZIO_IDS()                      \
+       { USB_DEVICE(0x1CBE, 0x0103) }
+DEVICE(zio, ZIO_IDS);
 
 /* All of the above structures mushed into two lists */
 static struct usb_serial_driver * const serial_drivers[] = {
        &carelink_device,
-       &zio_device,
-       &funsoft_device,
        &flashloader_device,
+       &funsoft_device,
        &google_device,
+       &hp4x_device,
+       &kaufmann_device,
        &libtransistor_device,
-       &vivopay_device,
        &moto_modem_device,
        &motorola_tetra_device,
        &nokia_device,
        &novatel_gps_device,
-       &hp4x_device,
-       &suunto_device,
        &siemens_mpi_device,
+       &suunto_device,
+       &vivopay_device,
+       &zio_device,
        NULL
 };
 
 static const struct usb_device_id id_table[] = {
        CARELINK_IDS(),
-       ZIO_IDS(),
-       FUNSOFT_IDS(),
        FLASHLOADER_IDS(),
+       FUNSOFT_IDS(),
        GOOGLE_IDS(),
+       HP4X_IDS(),
+       KAUFMANN_IDS(),
        LIBTRANSISTOR_IDS(),
-       VIVOPAY_IDS(),
        MOTO_IDS(),
        MOTOROLA_TETRA_IDS(),
        NOKIA_IDS(),
        NOVATEL_IDS(),
-       HP4X_IDS(),
-       SUUNTO_IDS(),
        SIEMENS_IDS(),
+       SUUNTO_IDS(),
+       VIVOPAY_IDS(),
+       ZIO_IDS(),
        { },
 };
 MODULE_DEVICE_TABLE(usb, id_table);
index 5e912dd..115f05a 100644 (file)
@@ -318,7 +318,8 @@ static int alauda_get_media_status(struct us_data *us, unsigned char *data)
        rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe,
                command, 0xc0, 0, 1, data, 2);
 
-       usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
+       if (rc == USB_STOR_XFER_GOOD)
+               usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
 
        return rc;
 }
@@ -454,9 +455,14 @@ static int alauda_init_media(struct us_data *us)
 static int alauda_check_media(struct us_data *us)
 {
        struct alauda_info *info = (struct alauda_info *) us->extra;
-       unsigned char status[2];
+       unsigned char *status = us->iobuf;
+       int rc;
 
-       alauda_get_media_status(us, status);
+       rc = alauda_get_media_status(us, status);
+       if (rc != USB_STOR_XFER_GOOD) {
+               status[0] = 0xF0;       /* Pretend there's no media */
+               status[1] = 0;
+       }
 
        /* Check for no media or door open */
        if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10)
index 66de880..cdf8261 100644 (file)
@@ -60,6 +60,7 @@ struct dp_altmode {
 
        enum dp_state state;
        bool hpd;
+       bool pending_hpd;
 
        struct mutex lock; /* device lock */
        struct work_struct work;
@@ -144,8 +145,13 @@ static int dp_altmode_status_update(struct dp_altmode *dp)
                dp->state = DP_STATE_EXIT;
        } else if (!(con & DP_CONF_CURRENTLY(dp->data.conf))) {
                ret = dp_altmode_configure(dp, con);
-               if (!ret)
+               if (!ret) {
                        dp->state = DP_STATE_CONFIGURE;
+                       if (dp->hpd != hpd) {
+                               dp->hpd = hpd;
+                               dp->pending_hpd = true;
+                       }
+               }
        } else {
                if (dp->hpd != hpd) {
                        drm_connector_oob_hotplug_event(dp->connector_fwnode);
@@ -161,6 +167,16 @@ static int dp_altmode_configured(struct dp_altmode *dp)
 {
        sysfs_notify(&dp->alt->dev.kobj, "displayport", "configuration");
        sysfs_notify(&dp->alt->dev.kobj, "displayport", "pin_assignment");
+       /*
+        * If the DFP_D/UFP_D sends a change in HPD when first notifying the
+        * DisplayPort driver that it is connected, then we wait until
+        * configuration is complete to signal HPD.
+        */
+       if (dp->pending_hpd) {
+               drm_connector_oob_hotplug_event(dp->connector_fwnode);
+               sysfs_notify(&dp->alt->dev.kobj, "displayport", "hpd");
+               dp->pending_hpd = false;
+       }
 
        return dp_altmode_notify(dp);
 }
index faa184a..9c1dbf3 100644 (file)
@@ -1277,8 +1277,7 @@ static ssize_t select_usb_power_delivery_show(struct device *dev,
 {
        struct typec_port *port = to_typec_port(dev);
        struct usb_power_delivery **pds;
-       struct usb_power_delivery *pd;
-       int ret = 0;
+       int i, ret = 0;
 
        if (!port->ops || !port->ops->pd_get)
                return -EOPNOTSUPP;
@@ -1287,11 +1286,11 @@ static ssize_t select_usb_power_delivery_show(struct device *dev,
        if (!pds)
                return 0;
 
-       for (pd = pds[0]; pd; pd++) {
-               if (pd == port->pd)
-                       ret += sysfs_emit(buf + ret, "[%s] ", dev_name(&pd->dev));
+       for (i = 0; pds[i]; i++) {
+               if (pds[i] == port->pd)
+                       ret += sysfs_emit_at(buf, ret, "[%s] ", dev_name(&pds[i]->dev));
                else
-                       ret += sysfs_emit(buf + ret, "%s ", dev_name(&pd->dev));
+                       ret += sysfs_emit_at(buf, ret, "%s ", dev_name(&pds[i]->dev));
        }
 
        buf[ret - 1] = '\n';
@@ -2288,6 +2287,8 @@ struct typec_port *typec_register_port(struct device *parent,
                return ERR_PTR(ret);
        }
 
+       port->pd = cap->pd;
+
        ret = device_add(&port->dev);
        if (ret) {
                dev_err(parent, "failed to register port (%d)\n", ret);
@@ -2295,7 +2296,7 @@ struct typec_port *typec_register_port(struct device *parent,
                return ERR_PTR(ret);
        }
 
-       ret = typec_port_set_usb_power_delivery(port, cap->pd);
+       ret = usb_power_delivery_link_device(port->pd, &port->dev);
        if (ret) {
                dev_err(&port->dev, "failed to link pd\n");
                device_unregister(&port->dev);
index 784b9d8..65da611 100644 (file)
@@ -29,6 +29,7 @@ config TYPEC_MUX_INTEL_PMC
        tristate "Intel PMC mux control"
        depends on ACPI
        depends on INTEL_SCU_IPC
+       select USB_COMMON
        select USB_ROLE_SWITCH
        help
          Driver for USB muxes controlled by Intel PMC FW. Intel PMC FW can
index 80e580d..4d1122d 100644 (file)
@@ -463,16 +463,18 @@ static int nb7vpq904m_probe(struct i2c_client *client)
 
        ret = nb7vpq904m_register_bridge(nb7);
        if (ret)
-               return ret;
+               goto err_disable_gpio;
 
        sw_desc.drvdata = nb7;
        sw_desc.fwnode = dev->fwnode;
        sw_desc.set = nb7vpq904m_sw_set;
 
        nb7->sw = typec_switch_register(dev, &sw_desc);
-       if (IS_ERR(nb7->sw))
-               return dev_err_probe(dev, PTR_ERR(nb7->sw),
-                                    "Error registering typec switch\n");
+       if (IS_ERR(nb7->sw)) {
+               ret = dev_err_probe(dev, PTR_ERR(nb7->sw),
+                                   "Error registering typec switch\n");
+               goto err_disable_gpio;
+       }
 
        retimer_desc.drvdata = nb7;
        retimer_desc.fwnode = dev->fwnode;
@@ -480,12 +482,21 @@ static int nb7vpq904m_probe(struct i2c_client *client)
 
        nb7->retimer = typec_retimer_register(dev, &retimer_desc);
        if (IS_ERR(nb7->retimer)) {
-               typec_switch_unregister(nb7->sw);
-               return dev_err_probe(dev, PTR_ERR(nb7->retimer),
-                                    "Error registering typec retimer\n");
+               ret = dev_err_probe(dev, PTR_ERR(nb7->retimer),
+                                   "Error registering typec retimer\n");
+               goto err_switch_unregister;
        }
 
        return 0;
+
+err_switch_unregister:
+       typec_switch_unregister(nb7->sw);
+
+err_disable_gpio:
+       gpiod_set_value(nb7->enable_gpio, 0);
+       regulator_disable(nb7->vcc_supply);
+
+       return ret;
 }
 
 static void nb7vpq904m_remove(struct i2c_client *client)
index a905160..9b467a3 100644 (file)
@@ -209,8 +209,8 @@ static int qcom_pmic_typec_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, tcpm);
 
        tcpm->tcpc.fwnode = device_get_named_child_node(tcpm->dev, "connector");
-       if (IS_ERR(tcpm->tcpc.fwnode))
-               return PTR_ERR(tcpm->tcpc.fwnode);
+       if (!tcpm->tcpc.fwnode)
+               return -EINVAL;
 
        tcpm->tcpm_port = tcpm_register_port(tcpm->dev, &tcpm->tcpc);
        if (IS_ERR(tcpm->tcpm_port)) {
index 829d75e..cc1d839 100644 (file)
@@ -5349,6 +5349,10 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
                /* Do nothing, vbus drop expected */
                break;
 
+       case SNK_HARD_RESET_WAIT_VBUS:
+               /* Do nothing, its OK to receive vbus off events */
+               break;
+
        default:
                if (port->pwr_role == TYPEC_SINK && port->attached)
                        tcpm_set_state(port, SNK_UNATTACHED, tcpm_wait_for_discharge(port));
@@ -5395,6 +5399,9 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port)
        case SNK_DEBOUNCED:
                /*Do nothing, still waiting for VSAFE5V for connect */
                break;
+       case SNK_HARD_RESET_WAIT_VBUS:
+               /* Do nothing, its OK to receive vbus off events */
+               break;
        default:
                if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled)
                        tcpm_set_state(port, SNK_UNATTACHED, 0);
index 9b6d6b1..f690131 100644 (file)
@@ -785,6 +785,8 @@ static void ucsi_unregister_partner(struct ucsi_connector *con)
        if (!con->partner)
                return;
 
+       typec_set_mode(con->port, TYPEC_STATE_SAFE);
+
        ucsi_unregister_partner_pdos(con);
        ucsi_unregister_altmodes(con, UCSI_RECIPIENT_SOP);
        typec_unregister_partner(con->partner);
@@ -825,8 +827,6 @@ static void ucsi_partner_change(struct ucsi_connector *con)
                                        UCSI_CONSTAT_PARTNER_FLAG_USB)
                                typec_set_mode(con->port, TYPEC_STATE_USB);
                }
-       } else {
-               typec_set_mode(con->port, TYPEC_STATE_SAFE);
        }
 
        /* Only notify USB controller if partner supports USB data */
index 25fc412..b53420e 100644 (file)
@@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
        struct list_head head;
        unsigned long num_directs;
        unsigned long num_klms;
+       /* state of dvq mr */
        bool initialized;
 
        /* serialize mkey creation and destruction */
@@ -121,6 +122,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io
 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
                        unsigned int asid);
 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
+void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
 
 #define mlx5_vdpa_warn(__dev, format, ...)                                                         \
        dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__,     \
index 03e5432..5a1971f 100644 (file)
@@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
        }
 }
 
-void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+               return;
+
+       prune_iotlb(mvdev);
+}
+
+static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
 {
        struct mlx5_vdpa_mr *mr = &mvdev->mr;
 
-       mutex_lock(&mr->mkey_mtx);
+       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
+               return;
+
        if (!mr->initialized)
-               goto out;
+               return;
 
-       prune_iotlb(mvdev);
        if (mr->user_mr)
                destroy_user_mr(mvdev, mr);
        else
                destroy_dma_mr(mvdev, mr);
 
        mr->initialized = false;
-out:
+}
+
+void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+       struct mlx5_vdpa_mr *mr = &mvdev->mr;
+
+       mutex_lock(&mr->mkey_mtx);
+
+       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
+       _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
+
        mutex_unlock(&mr->mkey_mtx);
 }
 
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
-                               struct vhost_iotlb *iotlb, unsigned int asid)
+void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+{
+       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
+       mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
+}
+
+static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
+                                   struct vhost_iotlb *iotlb,
+                                   unsigned int asid)
+{
+       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+               return 0;
+
+       return dup_iotlb(mvdev, iotlb);
+}
+
+static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
+                                   struct vhost_iotlb *iotlb,
+                                   unsigned int asid)
 {
        struct mlx5_vdpa_mr *mr = &mvdev->mr;
        int err;
 
-       if (mr->initialized)
+       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
                return 0;
 
-       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
-               if (iotlb)
-                       err = create_user_mr(mvdev, iotlb);
-               else
-                       err = create_dma_mr(mvdev, mr);
+       if (mr->initialized)
+               return 0;
 
-               if (err)
-                       return err;
-       }
+       if (iotlb)
+               err = create_user_mr(mvdev, iotlb);
+       else
+               err = create_dma_mr(mvdev, mr);
 
-       if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
-               err = dup_iotlb(mvdev, iotlb);
-               if (err)
-                       goto out_err;
-       }
+       if (err)
+               return err;
 
        mr->initialized = true;
+
+       return 0;
+}
+
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
+                               struct vhost_iotlb *iotlb, unsigned int asid)
+{
+       int err;
+
+       err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
+       if (err)
+               return err;
+
+       err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
+       if (err)
+               goto out_err;
+
        return 0;
 
 out_err:
-       if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
-               if (iotlb)
-                       destroy_user_mr(mvdev, mr);
-               else
-                       destroy_dma_mr(mvdev, mr);
-       }
+       _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
 
        return err;
 }
index 9138ef2..37be945 100644 (file)
@@ -2517,7 +2517,15 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
        else
                ndev->rqt_size = 1;
 
-       ndev->cur_num_vqs = 2 * ndev->rqt_size;
+       /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
+        * 5.1.6.5.5 "Device operation in multiqueue mode":
+        *
+        * Multiqueue is disabled by default.
+        * The driver enables multiqueue by sending a command using class
+        * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
+        * operation, as follows: ...
+        */
+       ndev->cur_num_vqs = 2;
 
        update_cvq_info(mvdev);
        return err;
@@ -2636,7 +2644,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
                goto err_mr;
 
        teardown_driver(ndev);
-       mlx5_vdpa_destroy_mr(mvdev);
+       mlx5_vdpa_destroy_mr_asid(mvdev, asid);
        err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
        if (err)
                goto err_mr;
@@ -2652,7 +2660,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
        return 0;
 
 err_setup:
-       mlx5_vdpa_destroy_mr(mvdev);
+       mlx5_vdpa_destroy_mr_asid(mvdev, asid);
 err_mr:
        return err;
 }
@@ -3548,17 +3556,6 @@ static void mlx5v_remove(struct auxiliary_device *adev)
        kfree(mgtdev);
 }
 
-static void mlx5v_shutdown(struct auxiliary_device *auxdev)
-{
-       struct mlx5_vdpa_mgmtdev *mgtdev;
-       struct mlx5_vdpa_net *ndev;
-
-       mgtdev = auxiliary_get_drvdata(auxdev);
-       ndev = mgtdev->ndev;
-
-       free_irqs(ndev);
-}
-
 static const struct auxiliary_device_id mlx5v_id_table[] = {
        { .name = MLX5_ADEV_NAME ".vnet", },
        {},
@@ -3570,7 +3567,6 @@ static struct auxiliary_driver mlx5v_driver = {
        .name = "vnet",
        .probe = mlx5v_probe,
        .remove = mlx5v_remove,
-       .shutdown = mlx5v_shutdown,
        .id_table = mlx5v_id_table,
 };
 
index 2e22418..c2d314d 100644 (file)
@@ -5,6 +5,5 @@ obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o
 
 pds_vdpa-y := aux_drv.o \
              cmds.o \
+             debugfs.o \
              vdpa_dev.o
-
-pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o
index 21a0dc0..9b04aad 100644 (file)
@@ -176,6 +176,7 @@ static int identity_show(struct seq_file *seq, void *v)
 {
        struct pds_vdpa_aux *vdpa_aux = seq->private;
        struct vdpa_mgmt_dev *mgmt;
+       u64 hw_features;
 
        seq_printf(seq, "aux_dev:            %s\n",
                   dev_name(&vdpa_aux->padev->aux_dev.dev));
@@ -183,8 +184,9 @@ static int identity_show(struct seq_file *seq, void *v)
        mgmt = &vdpa_aux->vdpa_mdev;
        seq_printf(seq, "max_vqs:            %d\n", mgmt->max_supported_vqs);
        seq_printf(seq, "config_attr_mask:   %#llx\n", mgmt->config_attr_mask);
-       seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features);
-       print_feature_bits_all(seq, mgmt->supported_features);
+       hw_features = le64_to_cpu(vdpa_aux->ident.hw_features);
+       seq_printf(seq, "hw_features:        %#llx\n", hw_features);
+       print_feature_bits_all(seq, hw_features);
 
        return 0;
 }
@@ -200,7 +202,6 @@ static int config_show(struct seq_file *seq, void *v)
 {
        struct pds_vdpa_device *pdsv = seq->private;
        struct virtio_net_config vc;
-       u64 driver_features;
        u8 status;
 
        memcpy_fromio(&vc, pdsv->vdpa_aux->vd_mdev.device,
@@ -223,12 +224,8 @@ static int config_show(struct seq_file *seq, void *v)
        status = vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev);
        seq_printf(seq, "dev_status:           %#x\n", status);
        print_status_bits(seq, status);
-
-       seq_printf(seq, "req_features:         %#llx\n", pdsv->req_features);
-       print_feature_bits_all(seq, pdsv->req_features);
-       driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev);
-       seq_printf(seq, "driver_features:      %#llx\n", driver_features);
-       print_feature_bits_all(seq, driver_features);
+       seq_printf(seq, "negotiated_features:  %#llx\n", pdsv->negotiated_features);
+       print_feature_bits_all(seq, pdsv->negotiated_features);
        seq_printf(seq, "vdpa_index:           %d\n", pdsv->vdpa_index);
        seq_printf(seq, "num_vqs:              %d\n", pdsv->num_vqs);
 
index 5071a4d..52b2449 100644 (file)
@@ -126,11 +126,9 @@ static void pds_vdpa_release_irq(struct pds_vdpa_device *pdsv, int qid)
 static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready)
 {
        struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
-       struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
        struct device *dev = &pdsv->vdpa_dev.dev;
        u64 driver_features;
        u16 invert_idx = 0;
-       int irq;
        int err;
 
        dev_dbg(dev, "%s: qid %d ready %d => %d\n",
@@ -143,19 +141,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
                invert_idx = PDS_VDPA_PACKED_INVERT_IDX;
 
        if (ready) {
-               irq = pci_irq_vector(pdev, qid);
-               snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name),
-                        "vdpa-%s-%d", dev_name(dev), qid);
-
-               err = request_irq(irq, pds_vdpa_isr, 0,
-                                 pdsv->vqs[qid].irq_name, &pdsv->vqs[qid]);
-               if (err) {
-                       dev_err(dev, "%s: no irq for qid %d: %pe\n",
-                               __func__, qid, ERR_PTR(err));
-                       return;
-               }
-               pdsv->vqs[qid].irq = irq;
-
                /* Pass vq setup info to DSC using adminq to gather up and
                 * send all info at once so FW can do its full set up in
                 * one easy operation
@@ -164,7 +149,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
                if (err) {
                        dev_err(dev, "Failed to init vq %d: %pe\n",
                                qid, ERR_PTR(err));
-                       pds_vdpa_release_irq(pdsv, qid);
                        ready = false;
                }
        } else {
@@ -172,7 +156,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
                if (err)
                        dev_err(dev, "%s: reset_vq failed qid %d: %pe\n",
                                __func__, qid, ERR_PTR(err));
-               pds_vdpa_release_irq(pdsv, qid);
        }
 
        pdsv->vqs[qid].ready = ready;
@@ -318,6 +301,7 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur
        struct device *dev = &pdsv->vdpa_dev.dev;
        u64 driver_features;
        u64 nego_features;
+       u64 hw_features;
        u64 missing;
 
        if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
@@ -325,21 +309,26 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur
                return -EOPNOTSUPP;
        }
 
-       pdsv->req_features = features;
-
        /* Check for valid feature bits */
-       nego_features = features & le64_to_cpu(pdsv->vdpa_aux->ident.hw_features);
-       missing = pdsv->req_features & ~nego_features;
+       nego_features = features & pdsv->supported_features;
+       missing = features & ~nego_features;
        if (missing) {
                dev_err(dev, "Can't support all requested features in %#llx, missing %#llx features\n",
-                       pdsv->req_features, missing);
+                       features, missing);
                return -EOPNOTSUPP;
        }
 
+       pdsv->negotiated_features = nego_features;
+
        driver_features = pds_vdpa_get_driver_features(vdpa_dev);
        dev_dbg(dev, "%s: %#llx => %#llx\n",
                __func__, driver_features, nego_features);
 
+       /* if we're faking the F_MAC, strip it before writing to device */
+       hw_features = le64_to_cpu(pdsv->vdpa_aux->ident.hw_features);
+       if (!(hw_features & BIT_ULL(VIRTIO_NET_F_MAC)))
+               nego_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
+
        if (driver_features == nego_features)
                return 0;
 
@@ -352,7 +341,7 @@ static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev)
 {
        struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
 
-       return vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev);
+       return pdsv->negotiated_features;
 }
 
 static void pds_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
@@ -389,6 +378,72 @@ static u8 pds_vdpa_get_status(struct vdpa_device *vdpa_dev)
        return vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev);
 }
 
+static int pds_vdpa_request_irqs(struct pds_vdpa_device *pdsv)
+{
+       struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
+       struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux;
+       struct device *dev = &pdsv->vdpa_dev.dev;
+       int max_vq, nintrs, qid, err;
+
+       max_vq = vdpa_aux->vdpa_mdev.max_supported_vqs;
+
+       nintrs = pci_alloc_irq_vectors(pdev, max_vq, max_vq, PCI_IRQ_MSIX);
+       if (nintrs < 0) {
+               dev_err(dev, "Couldn't get %d msix vectors: %pe\n",
+                       max_vq, ERR_PTR(nintrs));
+               return nintrs;
+       }
+
+       for (qid = 0; qid < pdsv->num_vqs; ++qid) {
+               int irq = pci_irq_vector(pdev, qid);
+
+               snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name),
+                        "vdpa-%s-%d", dev_name(dev), qid);
+
+               err = request_irq(irq, pds_vdpa_isr, 0,
+                                 pdsv->vqs[qid].irq_name,
+                                 &pdsv->vqs[qid]);
+               if (err) {
+                       dev_err(dev, "%s: no irq for qid %d: %pe\n",
+                               __func__, qid, ERR_PTR(err));
+                       goto err_release;
+               }
+
+               pdsv->vqs[qid].irq = irq;
+       }
+
+       vdpa_aux->nintrs = nintrs;
+
+       return 0;
+
+err_release:
+       while (qid--)
+               pds_vdpa_release_irq(pdsv, qid);
+
+       pci_free_irq_vectors(pdev);
+
+       vdpa_aux->nintrs = 0;
+
+       return err;
+}
+
+static void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv)
+{
+       struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
+       struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux;
+       int qid;
+
+       if (!vdpa_aux->nintrs)
+               return;
+
+       for (qid = 0; qid < pdsv->num_vqs; qid++)
+               pds_vdpa_release_irq(pdsv, qid);
+
+       pci_free_irq_vectors(pdev);
+
+       vdpa_aux->nintrs = 0;
+}
+
 static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
 {
        struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
@@ -399,6 +454,11 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
        old_status = pds_vdpa_get_status(vdpa_dev);
        dev_dbg(dev, "%s: old %#x new %#x\n", __func__, old_status, status);
 
+       if (status & ~old_status & VIRTIO_CONFIG_S_DRIVER_OK) {
+               if (pds_vdpa_request_irqs(pdsv))
+                       status = old_status | VIRTIO_CONFIG_S_FAILED;
+       }
+
        pds_vdpa_cmd_set_status(pdsv, status);
 
        /* Note: still working with FW on the need for this reset cmd */
@@ -409,6 +469,8 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
                        pdsv->vqs[i].avail_idx = 0;
                        pdsv->vqs[i].used_idx = 0;
                }
+
+               pds_vdpa_cmd_set_mac(pdsv, pdsv->mac);
        }
 
        if (status & ~old_status & VIRTIO_CONFIG_S_FEATURES_OK) {
@@ -418,6 +480,20 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
                                                        i, &pdsv->vqs[i].notify_pa);
                }
        }
+
+       if (old_status & ~status & VIRTIO_CONFIG_S_DRIVER_OK)
+               pds_vdpa_release_irqs(pdsv);
+}
+
+static void pds_vdpa_init_vqs_entry(struct pds_vdpa_device *pdsv, int qid,
+                                   void __iomem *notify)
+{
+       memset(&pdsv->vqs[qid], 0, sizeof(pdsv->vqs[0]));
+       pdsv->vqs[qid].qid = qid;
+       pdsv->vqs[qid].pdsv = pdsv;
+       pdsv->vqs[qid].ready = false;
+       pdsv->vqs[qid].irq = VIRTIO_MSI_NO_VECTOR;
+       pdsv->vqs[qid].notify = notify;
 }
 
 static int pds_vdpa_reset(struct vdpa_device *vdpa_dev)
@@ -441,14 +517,17 @@ static int pds_vdpa_reset(struct vdpa_device *vdpa_dev)
                        if (err)
                                dev_err(dev, "%s: reset_vq failed qid %d: %pe\n",
                                        __func__, i, ERR_PTR(err));
-                       pds_vdpa_release_irq(pdsv, i);
-                       memset(&pdsv->vqs[i], 0, sizeof(pdsv->vqs[0]));
-                       pdsv->vqs[i].ready = false;
                }
        }
 
        pds_vdpa_set_status(vdpa_dev, 0);
 
+       if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+               /* Reset the vq info */
+               for (i = 0; i < pdsv->num_vqs && !err; i++)
+                       pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify);
+       }
+
        return 0;
 }
 
@@ -532,7 +611,6 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
        struct device *dma_dev;
        struct pci_dev *pdev;
        struct device *dev;
-       u8 mac[ETH_ALEN];
        int err;
        int i;
 
@@ -563,7 +641,7 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
 
        if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
                u64 unsupp_features =
-                       add_config->device_features & ~mgmt->supported_features;
+                       add_config->device_features & ~pdsv->supported_features;
 
                if (unsupp_features) {
                        dev_err(dev, "Unsupported features: %#llx\n", unsupp_features);
@@ -614,29 +692,30 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
        }
 
        /* Set a mac, either from the user config if provided
-        * or set a random mac if default is 00:..:00
+        * or use the device's mac if not 00:..:00
+        * or set a random mac
         */
        if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
-               ether_addr_copy(mac, add_config->net.mac);
-               pds_vdpa_cmd_set_mac(pdsv, mac);
+               ether_addr_copy(pdsv->mac, add_config->net.mac);
        } else {
                struct virtio_net_config __iomem *vc;
 
                vc = pdsv->vdpa_aux->vd_mdev.device;
-               memcpy_fromio(mac, vc->mac, sizeof(mac));
-               if (is_zero_ether_addr(mac)) {
-                       eth_random_addr(mac);
-                       dev_info(dev, "setting random mac %pM\n", mac);
-                       pds_vdpa_cmd_set_mac(pdsv, mac);
+               memcpy_fromio(pdsv->mac, vc->mac, sizeof(pdsv->mac));
+               if (is_zero_ether_addr(pdsv->mac) &&
+                   (pdsv->supported_features & BIT_ULL(VIRTIO_NET_F_MAC))) {
+                       eth_random_addr(pdsv->mac);
+                       dev_info(dev, "setting random mac %pM\n", pdsv->mac);
                }
        }
+       pds_vdpa_cmd_set_mac(pdsv, pdsv->mac);
 
        for (i = 0; i < pdsv->num_vqs; i++) {
-               pdsv->vqs[i].qid = i;
-               pdsv->vqs[i].pdsv = pdsv;
-               pdsv->vqs[i].irq = VIRTIO_MSI_NO_VECTOR;
-               pdsv->vqs[i].notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev,
-                                                             i, &pdsv->vqs[i].notify_pa);
+               void __iomem *notify;
+
+               notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev,
+                                                i, &pdsv->vqs[i].notify_pa);
+               pds_vdpa_init_vqs_entry(pdsv, i, notify);
        }
 
        pdsv->vdpa_dev.mdev = &vdpa_aux->vdpa_mdev;
@@ -746,24 +825,19 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux)
 
        max_vqs = min_t(u16, dev_intrs, max_vqs);
        mgmt->max_supported_vqs = min_t(u16, PDS_VDPA_MAX_QUEUES, max_vqs);
-       vdpa_aux->nintrs = mgmt->max_supported_vqs;
+       vdpa_aux->nintrs = 0;
 
        mgmt->ops = &pds_vdpa_mgmt_dev_ops;
        mgmt->id_table = pds_vdpa_id_table;
        mgmt->device = dev;
        mgmt->supported_features = le64_to_cpu(vdpa_aux->ident.hw_features);
+
+       /* advertise F_MAC even if the device doesn't */
+       mgmt->supported_features |= BIT_ULL(VIRTIO_NET_F_MAC);
+
        mgmt->config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
        mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
        mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES);
 
-       err = pci_alloc_irq_vectors(pdev, vdpa_aux->nintrs, vdpa_aux->nintrs,
-                                   PCI_IRQ_MSIX);
-       if (err < 0) {
-               dev_err(dev, "Couldn't get %d msix vectors: %pe\n",
-                       vdpa_aux->nintrs, ERR_PTR(err));
-               return err;
-       }
-       vdpa_aux->nintrs = err;
-
        return 0;
 }
index a1bc37d..d984ba2 100644 (file)
@@ -35,10 +35,11 @@ struct pds_vdpa_device {
        struct pds_vdpa_aux *vdpa_aux;
 
        struct pds_vdpa_vq_info vqs[PDS_VDPA_MAX_QUEUES];
-       u64 supported_features;         /* specified device features */
-       u64 req_features;               /* features requested by vdpa */
+       u64 supported_features;         /* supported device features */
+       u64 negotiated_features;        /* negotiated features */
        u8 vdpa_index;                  /* rsvd for future subdevice use */
        u8 num_vqs;                     /* num vqs in use */
+       u8 mac[ETH_ALEN];               /* mac selected when the device was added */
        struct vdpa_callback config_cb;
        struct notifier_block nb;
 };
index 965e325..a7612e0 100644 (file)
@@ -1247,44 +1247,41 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
        [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
        [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
        [VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
+       [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 },
        /* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
        [VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
+       [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 },
+       [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 },
 };
 
 static const struct genl_ops vdpa_nl_ops[] = {
        {
                .cmd = VDPA_CMD_MGMTDEV_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_mgmtdev_get_doit,
                .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit,
        },
        {
                .cmd = VDPA_CMD_DEV_NEW,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_add_set_doit,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = VDPA_CMD_DEV_DEL,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_del_set_doit,
                .flags = GENL_ADMIN_PERM,
        },
        {
                .cmd = VDPA_CMD_DEV_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_get_doit,
                .dumpit = vdpa_nl_cmd_dev_get_dumpit,
        },
        {
                .cmd = VDPA_CMD_DEV_CONFIG_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_config_get_doit,
                .dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
        },
        {
                .cmd = VDPA_CMD_DEV_VSTATS_GET,
-               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = vdpa_nl_cmd_dev_stats_get_doit,
                .flags = GENL_ADMIN_PERM,
        },
index dc38ed2..df78695 100644 (file)
@@ -935,10 +935,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
 {
        struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
 
-       spin_lock_irq(&dev->irq_lock);
+       spin_lock_bh(&dev->irq_lock);
        if (dev->config_cb.callback)
                dev->config_cb.callback(dev->config_cb.private);
-       spin_unlock_irq(&dev->irq_lock);
+       spin_unlock_bh(&dev->irq_lock);
 }
 
 static void vduse_vq_irq_inject(struct work_struct *work)
@@ -946,10 +946,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
        struct vduse_virtqueue *vq = container_of(work,
                                        struct vduse_virtqueue, inject);
 
-       spin_lock_irq(&vq->irq_lock);
+       spin_lock_bh(&vq->irq_lock);
        if (vq->ready && vq->cb.callback)
                vq->cb.callback(vq->cb.private);
-       spin_unlock_irq(&vq->irq_lock);
+       spin_unlock_bh(&vq->irq_lock);
 }
 
 static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
index c83f7f0..abef061 100644 (file)
@@ -25,6 +25,8 @@
 #include <linux/fs.h>
 #include <linux/vmalloc.h>
 #include <linux/miscdevice.h>
+#include <linux/blk_types.h>
+#include <linux/bio.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi_common.h>
 #include <scsi/scsi_proto.h>
@@ -75,6 +77,9 @@ struct vhost_scsi_cmd {
        u32 tvc_prot_sgl_count;
        /* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */
        u32 tvc_lun;
+       u32 copied_iov:1;
+       const void *saved_iter_addr;
+       struct iov_iter saved_iter;
        /* Pointer to the SGL formatted memory from virtio-scsi */
        struct scatterlist *tvc_sgl;
        struct scatterlist *tvc_prot_sgl;
@@ -328,8 +333,13 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd)
        int i;
 
        if (tv_cmd->tvc_sgl_count) {
-               for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
-                       put_page(sg_page(&tv_cmd->tvc_sgl[i]));
+               for (i = 0; i < tv_cmd->tvc_sgl_count; i++) {
+                       if (tv_cmd->copied_iov)
+                               __free_page(sg_page(&tv_cmd->tvc_sgl[i]));
+                       else
+                               put_page(sg_page(&tv_cmd->tvc_sgl[i]));
+               }
+               kfree(tv_cmd->saved_iter_addr);
        }
        if (tv_cmd->tvc_prot_sgl_count) {
                for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++)
@@ -504,6 +514,28 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
        mutex_unlock(&vq->mutex);
 }
 
+static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd)
+{
+       struct iov_iter *iter = &cmd->saved_iter;
+       struct scatterlist *sg = cmd->tvc_sgl;
+       struct page *page;
+       size_t len;
+       int i;
+
+       for (i = 0; i < cmd->tvc_sgl_count; i++) {
+               page = sg_page(&sg[i]);
+               len = sg[i].length;
+
+               if (copy_page_to_iter(page, 0, len, iter) != len) {
+                       pr_err("Could not copy data while handling misaligned cmd. Error %zu\n",
+                              len);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
 /* Fill in status and signal that we are done processing this command
  *
  * This is scheduled in the vhost work queue so we are called with the owner
@@ -527,15 +559,20 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 
                pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
                        cmd, se_cmd->residual_count, se_cmd->scsi_status);
-
                memset(&v_rsp, 0, sizeof(v_rsp));
-               v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count);
-               /* TODO is status_qualifier field needed? */
-               v_rsp.status = se_cmd->scsi_status;
-               v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
-                                                se_cmd->scsi_sense_length);
-               memcpy(v_rsp.sense, cmd->tvc_sense_buf,
-                      se_cmd->scsi_sense_length);
+
+               if (cmd->saved_iter_addr && vhost_scsi_copy_sgl_to_iov(cmd)) {
+                       v_rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
+               } else {
+                       v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq,
+                                                    se_cmd->residual_count);
+                       /* TODO is status_qualifier field needed? */
+                       v_rsp.status = se_cmd->scsi_status;
+                       v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
+                                                        se_cmd->scsi_sense_length);
+                       memcpy(v_rsp.sense, cmd->tvc_sense_buf,
+                              se_cmd->scsi_sense_length);
+               }
 
                iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov,
                              cmd->tvc_in_iovs, sizeof(v_rsp));
@@ -613,12 +650,12 @@ static int
 vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
                      struct iov_iter *iter,
                      struct scatterlist *sgl,
-                     bool write)
+                     bool is_prot)
 {
        struct page **pages = cmd->tvc_upages;
        struct scatterlist *sg = sgl;
-       ssize_t bytes;
-       size_t offset;
+       ssize_t bytes, mapped_bytes;
+       size_t offset, mapped_offset;
        unsigned int npages = 0;
 
        bytes = iov_iter_get_pages2(iter, pages, LONG_MAX,
@@ -627,13 +664,53 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
        if (bytes <= 0)
                return bytes < 0 ? bytes : -EFAULT;
 
+       mapped_bytes = bytes;
+       mapped_offset = offset;
+
        while (bytes) {
                unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes);
+               /*
+                * The block layer requires bios/requests to be a multiple of
+                * 512 bytes, but Windows can send us vecs that are misaligned.
+                * This can result in bios and later requests with misaligned
+                * sizes if we have to break up a cmd/scatterlist into multiple
+                * bios.
+                *
+                * We currently only break up a command into multiple bios if
+                * we hit the vec/seg limit, so check if our sgl_count is
+                * greater than the max and if a vec in the cmd has a
+                * misaligned offset/size.
+                */
+               if (!is_prot &&
+                   (offset & (SECTOR_SIZE - 1) || n & (SECTOR_SIZE - 1)) &&
+                   cmd->tvc_sgl_count > BIO_MAX_VECS) {
+                       WARN_ONCE(true,
+                                 "vhost-scsi detected misaligned IO. Performance may be degraded.");
+                       goto revert_iter_get_pages;
+               }
+
                sg_set_page(sg++, pages[npages++], n, offset);
                bytes -= n;
                offset = 0;
        }
+
        return npages;
+
+revert_iter_get_pages:
+       iov_iter_revert(iter, mapped_bytes);
+
+       npages = 0;
+       while (mapped_bytes) {
+               unsigned int n = min_t(unsigned int, PAGE_SIZE - mapped_offset,
+                                      mapped_bytes);
+
+               put_page(pages[npages++]);
+
+               mapped_bytes -= n;
+               mapped_offset = 0;
+       }
+
+       return -EINVAL;
 }
 
 static int
@@ -657,25 +734,80 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls)
 }
 
 static int
-vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write,
-                     struct iov_iter *iter,
-                     struct scatterlist *sg, int sg_count)
+vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter,
+                          struct scatterlist *sg, int sg_count)
+{
+       size_t len = iov_iter_count(iter);
+       unsigned int nbytes = 0;
+       struct page *page;
+       int i;
+
+       if (cmd->tvc_data_direction == DMA_FROM_DEVICE) {
+               cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter,
+                                               GFP_KERNEL);
+               if (!cmd->saved_iter_addr)
+                       return -ENOMEM;
+       }
+
+       for (i = 0; i < sg_count; i++) {
+               page = alloc_page(GFP_KERNEL);
+               if (!page) {
+                       i--;
+                       goto err;
+               }
+
+               nbytes = min_t(unsigned int, PAGE_SIZE, len);
+               sg_set_page(&sg[i], page, nbytes, 0);
+
+               if (cmd->tvc_data_direction == DMA_TO_DEVICE &&
+                   copy_page_from_iter(page, 0, nbytes, iter) != nbytes)
+                       goto err;
+
+               len -= nbytes;
+       }
+
+       cmd->copied_iov = 1;
+       return 0;
+
+err:
+       pr_err("Could not read %u bytes while handling misaligned cmd\n",
+              nbytes);
+
+       for (; i >= 0; i--)
+               __free_page(sg_page(&sg[i]));
+       kfree(cmd->saved_iter_addr);
+       return -ENOMEM;
+}
+
+static int
+vhost_scsi_map_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter,
+                         struct scatterlist *sg, int sg_count, bool is_prot)
 {
        struct scatterlist *p = sg;
+       size_t revert_bytes;
        int ret;
 
        while (iov_iter_count(iter)) {
-               ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write);
+               ret = vhost_scsi_map_to_sgl(cmd, iter, sg, is_prot);
                if (ret < 0) {
+                       revert_bytes = 0;
+
                        while (p < sg) {
-                               struct page *page = sg_page(p++);
-                               if (page)
+                               struct page *page = sg_page(p);
+
+                               if (page) {
                                        put_page(page);
+                                       revert_bytes += p->length;
+                               }
+                               p++;
                        }
+
+                       iov_iter_revert(iter, revert_bytes);
                        return ret;
                }
                sg += ret;
        }
+
        return 0;
 }
 
@@ -685,7 +817,6 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
                 size_t data_bytes, struct iov_iter *data_iter)
 {
        int sgl_count, ret;
-       bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE);
 
        if (prot_bytes) {
                sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes,
@@ -698,9 +829,9 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
                pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
                         cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count);
 
-               ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter,
-                                           cmd->tvc_prot_sgl,
-                                           cmd->tvc_prot_sgl_count);
+               ret = vhost_scsi_map_iov_to_sgl(cmd, prot_iter,
+                                               cmd->tvc_prot_sgl,
+                                               cmd->tvc_prot_sgl_count, true);
                if (ret < 0) {
                        cmd->tvc_prot_sgl_count = 0;
                        return ret;
@@ -716,8 +847,14 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
        pr_debug("%s data_sg %p data_sgl_count %u\n", __func__,
                  cmd->tvc_sgl, cmd->tvc_sgl_count);
 
-       ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter,
-                                   cmd->tvc_sgl, cmd->tvc_sgl_count);
+       ret = vhost_scsi_map_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl,
+                                       cmd->tvc_sgl_count, false);
+       if (ret == -EINVAL) {
+               sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count);
+               ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl,
+                                                cmd->tvc_sgl_count);
+       }
+
        if (ret < 0) {
                cmd->tvc_sgl_count = 0;
                return ret;
index d11cfd2..992a4fa 100644 (file)
@@ -156,7 +156,7 @@ static bool sticon_scroll(struct vc_data *conp, unsigned int t,
     return false;
 }
 
-static int sticon_set_def_font(int unit, struct console_font *op)
+static void sticon_set_def_font(int unit)
 {
        if (font_data[unit] != STI_DEF_FONT) {
                if (--FNTREFCOUNT(font_data[unit]) == 0) {
@@ -165,8 +165,6 @@ static int sticon_set_def_font(int unit, struct console_font *op)
                }
                font_data[unit] = STI_DEF_FONT;
        }
-
-       return 0;
 }
 
 static int sticon_set_font(struct vc_data *vc, struct console_font *op,
@@ -246,7 +244,7 @@ static int sticon_set_font(struct vc_data *vc, struct console_font *op,
                  vc->vc_video_erase_char, font_data[vc->vc_num]);
 
        /* delete old font in case it is a user font */
-       sticon_set_def_font(unit, NULL);
+       sticon_set_def_font(unit);
 
        FNTREFCOUNT(cooked_font)++;
        font_data[unit] = cooked_font;
@@ -264,7 +262,9 @@ static int sticon_set_font(struct vc_data *vc, struct console_font *op,
 
 static int sticon_font_default(struct vc_data *vc, struct console_font *op, char *name)
 {
-       return sticon_set_def_font(vc->vc_num, op);
+       sticon_set_def_font(vc->vc_num);
+
+       return 0;
 }
 
 static int sticon_font_set(struct vc_data *vc, struct console_font *font,
@@ -297,7 +297,7 @@ static void sticon_deinit(struct vc_data *c)
 
     /* free memory used by user font */
     for (i = 0; i < MAX_NR_CONSOLES; i++)
-       sticon_set_def_font(i, NULL);
+       sticon_set_def_font(i);
 }
 
 static void sticon_clear(struct vc_data *conp, int sy, int sx, int height,
index e25ba52..7ad047b 100644 (file)
@@ -65,16 +65,8 @@ static struct vgastate vgastate;
  *  Interface used by the world
  */
 
-static const char *vgacon_startup(void);
-static void vgacon_init(struct vc_data *c, int init);
-static void vgacon_deinit(struct vc_data *c);
-static void vgacon_cursor(struct vc_data *c, int mode);
-static int vgacon_switch(struct vc_data *c);
-static int vgacon_blank(struct vc_data *c, int blank, int mode_switch);
-static void vgacon_scrolldelta(struct vc_data *c, int lines);
 static int vgacon_set_origin(struct vc_data *c);
-static void vgacon_save_screen(struct vc_data *c);
-static void vgacon_invert_region(struct vc_data *c, u16 * p, int count);
+
 static struct uni_pagedict *vgacon_uni_pagedir;
 static int vgacon_refcount;
 
@@ -142,12 +134,6 @@ static inline void vga_set_mem_top(struct vc_data *c)
        write_vga(12, (c->vc_visible_origin - vga_vram_base) / 2);
 }
 
-static void vgacon_restore_screen(struct vc_data *c)
-{
-       if (c->vc_origin != c->vc_visible_origin)
-               vgacon_scrolldelta(c, 0);
-}
-
 static void vgacon_scrolldelta(struct vc_data *c, int lines)
 {
        vc_scrolldelta_helper(c, lines, vga_rolled_over, (void *)vga_vram_base,
@@ -155,6 +141,12 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines)
        vga_set_mem_top(c);
 }
 
+static void vgacon_restore_screen(struct vc_data *c)
+{
+       if (c->vc_origin != c->vc_visible_origin)
+               vgacon_scrolldelta(c, 0);
+}
+
 static const char *vgacon_startup(void)
 {
        const char *display_desc = NULL;
@@ -445,7 +437,7 @@ static void vgacon_invert_region(struct vc_data *c, u16 * p, int count)
        }
 }
 
-static void vgacon_set_cursor_size(int xpos, int from, int to)
+static void vgacon_set_cursor_size(int from, int to)
 {
        unsigned long flags;
        int curs, cure;
@@ -478,18 +470,22 @@ static void vgacon_set_cursor_size(int xpos, int from, int to)
 
 static void vgacon_cursor(struct vc_data *c, int mode)
 {
+       unsigned int c_height;
+
        if (c->vc_mode != KD_TEXT)
                return;
 
        vgacon_restore_screen(c);
 
+       c_height = c->vc_cell_height;
+
        switch (mode) {
        case CM_ERASE:
                write_vga(14, (c->vc_pos - vga_vram_base) / 2);
                if (vga_video_type >= VIDEO_TYPE_VGAC)
-                       vgacon_set_cursor_size(c->state.x, 31, 30);
+                       vgacon_set_cursor_size(31, 30);
                else
-                       vgacon_set_cursor_size(c->state.x, 31, 31);
+                       vgacon_set_cursor_size(31, 31);
                break;
 
        case CM_MOVE:
@@ -497,51 +493,38 @@ static void vgacon_cursor(struct vc_data *c, int mode)
                write_vga(14, (c->vc_pos - vga_vram_base) / 2);
                switch (CUR_SIZE(c->vc_cursor_type)) {
                case CUR_UNDERLINE:
-                       vgacon_set_cursor_size(c->state.x,
-                                              c->vc_cell_height -
-                                              (c->vc_cell_height <
-                                               10 ? 2 : 3),
-                                              c->vc_cell_height -
-                                              (c->vc_cell_height <
-                                               10 ? 1 : 2));
+                       vgacon_set_cursor_size(c_height -
+                                              (c_height < 10 ? 2 : 3),
+                                              c_height -
+                                              (c_height < 10 ? 1 : 2));
                        break;
                case CUR_TWO_THIRDS:
-                       vgacon_set_cursor_size(c->state.x,
-                                              c->vc_cell_height / 3,
-                                              c->vc_cell_height -
-                                              (c->vc_cell_height <
-                                               10 ? 1 : 2));
+                       vgacon_set_cursor_size(c_height / 3, c_height -
+                                              (c_height < 10 ? 1 : 2));
                        break;
                case CUR_LOWER_THIRD:
-                       vgacon_set_cursor_size(c->state.x,
-                                              (c->vc_cell_height * 2) / 3,
-                                              c->vc_cell_height -
-                                              (c->vc_cell_height <
-                                               10 ? 1 : 2));
+                       vgacon_set_cursor_size(c_height * 2 / 3, c_height -
+                                              (c_height < 10 ? 1 : 2));
                        break;
                case CUR_LOWER_HALF:
-                       vgacon_set_cursor_size(c->state.x,
-                                              c->vc_cell_height / 2,
-                                              c->vc_cell_height -
-                                              (c->vc_cell_height <
-                                               10 ? 1 : 2));
+                       vgacon_set_cursor_size(c_height / 2, c_height -
+                                              (c_height < 10 ? 1 : 2));
                        break;
                case CUR_NONE:
                        if (vga_video_type >= VIDEO_TYPE_VGAC)
-                               vgacon_set_cursor_size(c->state.x, 31, 30);
+                               vgacon_set_cursor_size(31, 30);
                        else
-                               vgacon_set_cursor_size(c->state.x, 31, 31);
+                               vgacon_set_cursor_size(31, 31);
                        break;
                default:
-                       vgacon_set_cursor_size(c->state.x, 1,
-                                              c->vc_cell_height);
+                       vgacon_set_cursor_size(1, c_height);
                        break;
                }
                break;
        }
 }
 
-static int vgacon_doresize(struct vc_data *c,
+static void vgacon_doresize(struct vc_data *c,
                unsigned int width, unsigned int height)
 {
        unsigned long flags;
@@ -600,7 +583,6 @@ static int vgacon_doresize(struct vc_data *c,
        }
 
        raw_spin_unlock_irqrestore(&vga_lock, flags);
-       return 0;
 }
 
 static int vgacon_switch(struct vc_data *c)
index d88265d..f216b2c 100644 (file)
@@ -687,7 +687,7 @@ struct fb_var_cursorinfo {
        __u16 height;
        __u16 xspot;
        __u16 yspot;
-       __u8 data[1];                   /* field with [height][width]        */
+       DECLARE_FLEX_ARRAY(__u8, data); /* field with [height][width]        */
 };
 
 struct fb_cursorstate {
index 987c5f5..f245da1 100644 (file)
@@ -1308,7 +1308,7 @@ static struct platform_driver atmel_lcdfb_driver = {
        .resume         = atmel_lcdfb_resume,
        .driver         = {
                .name   = "atmel_lcdfb",
-               .of_match_table = of_match_ptr(atmel_lcdfb_dt_ids),
+               .of_match_table = atmel_lcdfb_dt_ids,
        },
 };
 
index 5c232eb..c137d6a 100644 (file)
@@ -1732,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev)
 
        /* Now hook interrupt too */
        irq = platform_get_irq(dev, 0);
+       if (irq < 0)
+               return irq;
+
        ret = request_irq(irq, au1200fb_handle_irq,
                          IRQF_SHARED, "lcd", (void *)dev);
        if (ret) {
index 025d663..39f438d 100644 (file)
@@ -17,7 +17,8 @@
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/fbio.h>
index 832a82f..90fdc9d 100644 (file)
@@ -17,7 +17,8 @@
 #include <linux/fb.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/fbio.h>
index 6335cd3..98c60f7 100644 (file)
@@ -17,7 +17,8 @@
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/fbio.h>
index 6884572..6427b85 100644 (file)
@@ -17,7 +17,8 @@
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/fbio.h>
index c6c9d04..887fad4 100644 (file)
@@ -1612,8 +1612,7 @@ static void fbcon_redraw_blit(struct vc_data *vc, struct fb_info *info,
        }
 }
 
-static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p,
-                        int line, int count, int offset)
+static void fbcon_redraw(struct vc_data *vc, int line, int count, int offset)
 {
        unsigned short *d = (unsigned short *)
            (vc->vc_origin + vc->vc_size_row * line);
@@ -1827,7 +1826,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 
                case SCROLL_REDRAW:
                      redraw_up:
-                       fbcon_redraw(vc, p, t, b - t - count,
+                       fbcon_redraw(vc, t, b - t - count,
                                     count * vc->vc_cols);
                        fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
                        scr_memsetw((unsigned short *) (vc->vc_origin +
@@ -1913,7 +1912,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
 
                case SCROLL_REDRAW:
                      redraw_down:
-                       fbcon_redraw(vc, p, b - 1, b - t - count,
+                       fbcon_redraw(vc, b - 1, b - t - count,
                                     -count * vc->vc_cols);
                        fbcon_clear(vc, t, 0, count, vc->vc_cols);
                        scr_memsetw((unsigned short *) (vc->vc_origin +
index 94fe529..22158d9 100644 (file)
@@ -548,7 +548,9 @@ static int ep93xxfb_probe(struct platform_device *pdev)
        }
 
        ep93xxfb_set_par(info);
-       clk_prepare_enable(fbi->clk);
+       err = clk_prepare_enable(fbi->clk);
+       if (err)
+               goto failed_check;
 
        err = register_framebuffer(info);
        if (err)
index c6d3111..c473841 100644 (file)
@@ -16,7 +16,8 @@
 #include <linux/fb.h>
 #include <linux/mm.h>
 #include <linux/timer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/upa.h>
index 6fa2108..e41c9fe 100644 (file)
@@ -203,8 +203,8 @@ static int goldfish_fb_probe(struct platform_device *pdev)
        }
 
        fb->irq = platform_get_irq(pdev, 0);
-       if (fb->irq <= 0) {
-               ret = -ENODEV;
+       if (fb->irq < 0) {
+               ret = fb->irq;
                goto err_no_irq;
        }
 
index 9aa15be..d4a9a58 100644 (file)
@@ -12,8 +12,7 @@
 
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
index adf3669..77dedd2 100644 (file)
@@ -613,10 +613,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
        if (var->hsync_len < 1    || var->hsync_len > 64)
                printk(KERN_ERR "%s: invalid hsync_len %d\n",
                        info->fix.id, var->hsync_len);
-       if (var->left_margin > 255)
+       if (var->left_margin < 3  || var->left_margin > 255)
                printk(KERN_ERR "%s: invalid left_margin %d\n",
                        info->fix.id, var->left_margin);
-       if (var->right_margin > 255)
+       if (var->right_margin < 1 || var->right_margin > 255)
                printk(KERN_ERR "%s: invalid right_margin %d\n",
                        info->fix.id, var->right_margin);
        if (var->yres < 1 || var->yres > ymax_mask)
@@ -673,7 +673,8 @@ static int imxfb_init_fbinfo(struct platform_device *pdev)
 
        pr_debug("%s\n",__func__);
 
-       info->pseudo_palette = kmalloc_array(16, sizeof(u32), GFP_KERNEL);
+       info->pseudo_palette = devm_kmalloc_array(&pdev->dev, 16,
+                                                 sizeof(u32), GFP_KERNEL);
        if (!info->pseudo_palette)
                return -ENOMEM;
 
@@ -868,7 +869,6 @@ static int imxfb_probe(struct platform_device *pdev)
        struct imxfb_info *fbi;
        struct lcd_device *lcd;
        struct fb_info *info;
-       struct resource *res;
        struct imx_fb_videomode *m;
        const struct of_device_id *of_id;
        struct device_node *display_np;
@@ -885,10 +885,6 @@ static int imxfb_probe(struct platform_device *pdev)
        if (of_id)
                pdev->id_entry = of_id->data;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
        info = framebuffer_alloc(sizeof(struct imxfb_info), &pdev->dev);
        if (!info)
                return -ENOMEM;
@@ -907,7 +903,7 @@ static int imxfb_probe(struct platform_device *pdev)
        if (!display_np) {
                dev_err(&pdev->dev, "No display defined in devicetree\n");
                ret = -EINVAL;
-               goto failed_of_parse;
+               goto failed_init;
        }
 
        /*
@@ -921,13 +917,13 @@ static int imxfb_probe(struct platform_device *pdev)
        if (!fbi->mode) {
                ret = -ENOMEM;
                of_node_put(display_np);
-               goto failed_of_parse;
+               goto failed_init;
        }
 
        ret = imxfb_of_read_mode(&pdev->dev, display_np, fbi->mode);
        of_node_put(display_np);
        if (ret)
-               goto failed_of_parse;
+               goto failed_init;
 
        /* Calculate maximum bytes used per pixel. In most cases this should
         * be the same as m->bpp/8 */
@@ -940,7 +936,7 @@ static int imxfb_probe(struct platform_device *pdev)
        fbi->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
        if (IS_ERR(fbi->clk_ipg)) {
                ret = PTR_ERR(fbi->clk_ipg);
-               goto failed_getclock;
+               goto failed_init;
        }
 
        /*
@@ -955,25 +951,25 @@ static int imxfb_probe(struct platform_device *pdev)
         */
        ret = clk_prepare_enable(fbi->clk_ipg);
        if (ret)
-               goto failed_getclock;
+               goto failed_init;
        clk_disable_unprepare(fbi->clk_ipg);
 
        fbi->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
        if (IS_ERR(fbi->clk_ahb)) {
                ret = PTR_ERR(fbi->clk_ahb);
-               goto failed_getclock;
+               goto failed_init;
        }
 
        fbi->clk_per = devm_clk_get(&pdev->dev, "per");
        if (IS_ERR(fbi->clk_per)) {
                ret = PTR_ERR(fbi->clk_per);
-               goto failed_getclock;
+               goto failed_init;
        }
 
-       fbi->regs = devm_ioremap_resource(&pdev->dev, res);
+       fbi->regs = devm_platform_ioremap_resource(pdev, 0);
        if (IS_ERR(fbi->regs)) {
                ret = PTR_ERR(fbi->regs);
-               goto failed_ioremap;
+               goto failed_init;
        }
 
        fbi->map_size = PAGE_ALIGN(info->fix.smem_len);
@@ -982,7 +978,7 @@ static int imxfb_probe(struct platform_device *pdev)
        if (!info->screen_buffer) {
                dev_err(&pdev->dev, "Failed to allocate video RAM\n");
                ret = -ENOMEM;
-               goto failed_map;
+               goto failed_init;
        }
 
        info->fix.smem_start = fbi->map_dma;
@@ -1034,18 +1030,11 @@ static int imxfb_probe(struct platform_device *pdev)
 
 failed_lcd:
        unregister_framebuffer(info);
-
 failed_register:
        fb_dealloc_cmap(&info->cmap);
 failed_cmap:
        dma_free_wc(&pdev->dev, fbi->map_size, info->screen_buffer,
                    fbi->map_dma);
-failed_map:
-failed_ioremap:
-failed_getclock:
-       release_mem_region(res->start, resource_size(res));
-failed_of_parse:
-       kfree(info->pseudo_palette);
 failed_init:
        framebuffer_release(info);
        return ret;
@@ -1062,11 +1051,10 @@ static void imxfb_remove(struct platform_device *pdev)
        fb_dealloc_cmap(&info->cmap);
        dma_free_wc(&pdev->dev, fbi->map_size, info->screen_buffer,
                    fbi->map_dma);
-       kfree(info->pseudo_palette);
        framebuffer_release(info);
 }
 
-static int __maybe_unused imxfb_suspend(struct device *dev)
+static int imxfb_suspend(struct device *dev)
 {
        struct fb_info *info = dev_get_drvdata(dev);
        struct imxfb_info *fbi = info->par;
@@ -1076,7 +1064,7 @@ static int __maybe_unused imxfb_suspend(struct device *dev)
        return 0;
 }
 
-static int __maybe_unused imxfb_resume(struct device *dev)
+static int imxfb_resume(struct device *dev)
 {
        struct fb_info *info = dev_get_drvdata(dev);
        struct imxfb_info *fbi = info->par;
@@ -1086,13 +1074,13 @@ static int __maybe_unused imxfb_resume(struct device *dev)
        return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(imxfb_pm_ops, imxfb_suspend, imxfb_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(imxfb_pm_ops, imxfb_suspend, imxfb_resume);
 
 static struct platform_driver imxfb_driver = {
        .driver         = {
                .name   = DRIVER_NAME,
                .of_match_table = imxfb_of_dev_id,
-               .pm     = &imxfb_pm_ops,
+               .pm     = pm_sleep_ptr(&imxfb_pm_ops),
        },
        .probe          = imxfb_probe,
        .remove_new     = imxfb_remove,
index edfa0a0..79886a2 100644 (file)
@@ -83,11 +83,11 @@ volatile u32 i,count=0; \
 static u32 InitSDRAMRegisters(volatile STG4000REG __iomem *pSTGReg,
                              u32 dwSubSysID, u32 dwRevID)
 {
-       u32 adwSDRAMArgCfg0[] = { 0xa0, 0x80, 0xa0, 0xa0, 0xa0 };
-       u32 adwSDRAMCfg1[] = { 0x8732, 0x8732, 0xa732, 0xa732, 0x8732 };
-       u32 adwSDRAMCfg2[] = { 0x87d2, 0x87d2, 0xa7d2, 0x87d2, 0xa7d2 };
-       u32 adwSDRAMRsh[] = { 36, 39, 40 };
-       u32 adwChipSpeed[] = { 110, 120, 125 };
+       static const u8 adwSDRAMArgCfg0[] = { 0xa0, 0x80, 0xa0, 0xa0, 0xa0 };
+       static const u16 adwSDRAMCfg1[] = { 0x8732, 0x8732, 0xa732, 0xa732, 0x8732 };
+       static const u16 adwSDRAMCfg2[] = { 0x87d2, 0x87d2, 0xa7d2, 0x87d2, 0xa7d2 };
+       static const u8 adwSDRAMRsh[] = { 36, 39, 40 };
+       static const u8 adwChipSpeed[] = { 110, 120, 125 };
        u32 dwMemTypeIdx;
        u32 dwChipSpeedIdx;
 
index 3ffc0a7..89ca482 100644 (file)
@@ -16,8 +16,9 @@
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/mm.h>
-#include <linux/of_device.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/fbio.h>
 
index 61aed7f..c35a747 100644 (file)
@@ -15,9 +15,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/slab.h>
-#if defined(CONFIG_OF)
-#include <linux/of_platform.h>
-#endif
+
 #include "mb862xxfb.h"
 #include "mb862xx_reg.h"
 #include "mb862xxfb_accel.h"
index b5c8fca..9dc347d 100644 (file)
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
-#if defined(CONFIG_OF)
+#include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_platform.h>
-#endif
+#include <linux/platform_device.h>
+
 #include "mb862xxfb.h"
 #include "mb862xx_reg.h"
 
index 51fbf02..76b50b6 100644 (file)
@@ -519,7 +519,9 @@ static int mmphw_probe(struct platform_device *pdev)
                              "unable to get clk %s\n", mi->clk_name);
                goto failed;
        }
-       clk_prepare_enable(ctrl->clk);
+       ret = clk_prepare_enable(ctrl->clk);
+       if (ret)
+               goto failed;
 
        /* init global regs */
        ctrl_set_default(ctrl);
index ba94a0a..77fce12 100644 (file)
 #include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
-#include <linux/of_device.h>
 
 #include <video/omapfb_dss.h>
 #include <video/mipi_display.h>
index 0876962..e2e747c 100644 (file)
@@ -15,7 +15,8 @@
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/fbio.h>
index f8283fc..b27f43b 100644 (file)
@@ -30,9 +30,9 @@
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/nvram.h>
+#include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
 
 #include "macmodes.h"
 #include "platinumfb.h"
index 7f79db8..21e9fd8 100644 (file)
@@ -11,7 +11,7 @@
 #include <linux/fb.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #include <asm/fbio.h>
 
index 11c3737..46881a6 100644 (file)
@@ -399,8 +399,8 @@ static int ssd1307fb_init(struct ssd1307fb_par *par)
                /* Enable the PWM */
                pwm_enable(par->pwm);
 
-               dev_dbg(&par->client->dev, "Using PWM%d with a %lluns period.\n",
-                       par->pwm->pwm, pwm_get_period(par->pwm));
+               dev_dbg(&par->client->dev, "Using PWM %s with a %lluns period.\n",
+                       par->pwm->label, pwm_get_period(par->pwm));
        }
 
        /* Set initial contrast */
index 490bd9a..17d61e1 100644 (file)
@@ -8,7 +8,8 @@
 #include <linux/kernel.h>
 #include <linux/fb.h>
 #include <linux/init.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 struct gfb_info {
        struct fb_info          *info;
index 2cab4b9..e64ec7d 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/fb.h>
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #include <asm/io.h>
 
index 6ec358a..c4e01e8 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/fb.h>
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 
 #include <asm/io.h>
 
index fc3ac23..255eb57 100644 (file)
@@ -17,7 +17,8 @@
 #include <linux/init.h>
 #include <linux/fb.h>
 #include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
 
 #include <asm/io.h>
 #include <asm/fbio.h>
index 2aa3a52..542badd 100644 (file)
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/errno.h>
+#include <linux/platform_device.h>
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
 #include <linux/io.h>
 #include <linux/slab.h>
 
index 835f6cc..fa5226c 100644 (file)
@@ -38,11 +38,6 @@ module_param(bbm_block_size, ulong, 0444);
 MODULE_PARM_DESC(bbm_block_size,
                 "Big Block size in bytes. Default is 0 (auto-detection).");
 
-static bool bbm_safe_unplug = true;
-module_param(bbm_safe_unplug, bool, 0444);
-MODULE_PARM_DESC(bbm_safe_unplug,
-            "Use a safe unplug mechanism in BBM, avoiding long/endless loops");
-
 /*
  * virtio-mem currently supports the following modes of operation:
  *
@@ -173,6 +168,13 @@ struct virtio_mem {
                        /* The number of subblocks per Linux memory block. */
                        uint32_t sbs_per_mb;
 
+                       /*
+                        * Some of the Linux memory blocks tracked as "partially
+                        * plugged" are completely unplugged and can be offlined
+                        * and removed -- which previously failed.
+                        */
+                       bool have_unplugged_mb;
+
                        /* Summary of all memory block states. */
                        unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT];
 
@@ -746,11 +748,15 @@ static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm,
                 * immediately instead of waiting.
                 */
                virtio_mem_retry(vm);
-       } else {
-               dev_dbg(&vm->vdev->dev,
-                       "offlining and removing memory failed: %d\n", rc);
+               return 0;
        }
-       return rc;
+       dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc);
+       /*
+        * We don't really expect this to fail, because we fake-offlined all
+        * memory already. But it could fail in corner cases.
+        */
+       WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY);
+       return rc == -ENOMEM ? -ENOMEM : -EBUSY;
 }
 
 /*
@@ -767,6 +773,34 @@ static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm,
 }
 
 /*
+ * Try (offlining and) removing memory from Linux in case all subblocks are
+ * unplugged. Can be called on online and offline memory blocks.
+ *
+ * May modify the state of memory blocks in virtio-mem.
+ */
+static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm,
+                                                 unsigned long mb_id)
+{
+       int rc;
+
+       /*
+        * Once all subblocks of a memory block were unplugged, offline and
+        * remove it.
+        */
+       if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
+               return 0;
+
+       /* offline_and_remove_memory() works for online and offline memory. */
+       mutex_unlock(&vm->hotplug_mutex);
+       rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
+       mutex_lock(&vm->hotplug_mutex);
+       if (!rc)
+               virtio_mem_sbm_set_mb_state(vm, mb_id,
+                                           VIRTIO_MEM_SBM_MB_UNUSED);
+       return rc;
+}
+
+/*
  * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a
  * all Linux memory blocks covered by the big block.
  */
@@ -1155,7 +1189,8 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
  * Try to allocate a range, marking pages fake-offline, effectively
  * fake-offlining them.
  */
-static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
+static int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn,
+                                  unsigned long nr_pages)
 {
        const bool is_movable = is_zone_movable_page(pfn_to_page(pfn));
        int rc, retry_count;
@@ -1168,6 +1203,14 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
         * some guarantees.
         */
        for (retry_count = 0; retry_count < 5; retry_count++) {
+               /*
+                * If the config changed, stop immediately and go back to the
+                * main loop: avoid trying to keep unplugging if the device
+                * might have decided to not remove any more memory.
+                */
+               if (atomic_read(&vm->config_changed))
+                       return -EAGAIN;
+
                rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE,
                                        GFP_KERNEL);
                if (rc == -ENOMEM)
@@ -1917,7 +1960,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
        start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
                             sb_id * vm->sbm.sb_size);
 
-       rc = virtio_mem_fake_offline(start_pfn, nr_pages);
+       rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages);
        if (rc)
                return rc;
 
@@ -1989,20 +2032,10 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm,
        }
 
 unplugged:
-       /*
-        * Once all subblocks of a memory block were unplugged, offline and
-        * remove it. This will usually not fail, as no memory is in use
-        * anymore - however some other notifiers might NACK the request.
-        */
-       if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
-               mutex_unlock(&vm->hotplug_mutex);
-               rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
-               mutex_lock(&vm->hotplug_mutex);
-               if (!rc)
-                       virtio_mem_sbm_set_mb_state(vm, mb_id,
-                                                   VIRTIO_MEM_SBM_MB_UNUSED);
-       }
-
+       rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id);
+       if (rc)
+               vm->sbm.have_unplugged_mb = 1;
+       /* Ignore errors, this is not critical. We'll retry later. */
        return 0;
 }
 
@@ -2111,38 +2144,32 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
                         VIRTIO_MEM_BBM_BB_ADDED))
                return -EINVAL;
 
-       if (bbm_safe_unplug) {
-               /*
-                * Start by fake-offlining all memory. Once we marked the device
-                * block as fake-offline, all newly onlined memory will
-                * automatically be kept fake-offline. Protect from concurrent
-                * onlining/offlining until we have a consistent state.
-                */
-               mutex_lock(&vm->hotplug_mutex);
-               virtio_mem_bbm_set_bb_state(vm, bb_id,
-                                           VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
+       /*
+        * Start by fake-offlining all memory. Once we marked the device
+        * block as fake-offline, all newly onlined memory will
+        * automatically be kept fake-offline. Protect from concurrent
+        * onlining/offlining until we have a consistent state.
+        */
+       mutex_lock(&vm->hotplug_mutex);
+       virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
 
-               for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
-                       page = pfn_to_online_page(pfn);
-                       if (!page)
-                               continue;
+       for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+               page = pfn_to_online_page(pfn);
+               if (!page)
+                       continue;
 
-                       rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
-                       if (rc) {
-                               end_pfn = pfn;
-                               goto rollback_safe_unplug;
-                       }
+               rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION);
+               if (rc) {
+                       end_pfn = pfn;
+                       goto rollback;
                }
-               mutex_unlock(&vm->hotplug_mutex);
        }
+       mutex_unlock(&vm->hotplug_mutex);
 
        rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
        if (rc) {
-               if (bbm_safe_unplug) {
-                       mutex_lock(&vm->hotplug_mutex);
-                       goto rollback_safe_unplug;
-               }
-               return rc;
+               mutex_lock(&vm->hotplug_mutex);
+               goto rollback;
        }
 
        rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
@@ -2154,7 +2181,7 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
                                            VIRTIO_MEM_BBM_BB_UNUSED);
        return rc;
 
-rollback_safe_unplug:
+rollback:
        for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
                page = pfn_to_online_page(pfn);
                if (!page)
@@ -2260,12 +2287,13 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
 
 /*
  * Try to unplug all blocks that couldn't be unplugged before, for example,
- * because the hypervisor was busy.
+ * because the hypervisor was busy. Further, offline and remove any memory
+ * blocks where we previously failed.
  */
-static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
+static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm)
 {
        unsigned long id;
-       int rc;
+       int rc = 0;
 
        if (!vm->in_sbm) {
                virtio_mem_bbm_for_each_bb(vm, id,
@@ -2287,6 +2315,27 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
                                            VIRTIO_MEM_SBM_MB_UNUSED);
        }
 
+       if (!vm->sbm.have_unplugged_mb)
+               return 0;
+
+       /*
+        * Let's retry (offlining and) removing completely unplugged Linux
+        * memory blocks.
+        */
+       vm->sbm.have_unplugged_mb = false;
+
+       mutex_lock(&vm->hotplug_mutex);
+       virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL)
+               rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+       virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL)
+               rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+       virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
+               rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+       mutex_unlock(&vm->hotplug_mutex);
+
+       if (rc)
+               vm->sbm.have_unplugged_mb = true;
+       /* Ignore errors, this is not critical. We'll retry later. */
        return 0;
 }
 
@@ -2368,9 +2417,9 @@ retry:
                virtio_mem_refresh_config(vm);
        }
 
-       /* Unplug any leftovers from previous runs */
+       /* Cleanup any leftovers from previous runs */
        if (!rc)
-               rc = virtio_mem_unplug_pending_mb(vm);
+               rc = virtio_mem_cleanup_pending_mb(vm);
 
        if (!rc && vm->requested_size != vm->plugged_size) {
                if (vm->requested_size > vm->plugged_size) {
@@ -2382,6 +2431,13 @@ retry:
                }
        }
 
+       /*
+        * Keep retrying to offline and remove completely unplugged Linux
+        * memory blocks.
+        */
+       if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb)
+               rc = -EBUSY;
+
        switch (rc) {
        case 0:
                vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
index a46a4a2..97760f6 100644 (file)
@@ -607,9 +607,8 @@ static void virtio_mmio_release_dev(struct device *_d)
        struct virtio_device *vdev =
                        container_of(_d, struct virtio_device, dev);
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-       struct platform_device *pdev = vm_dev->pdev;
 
-       devm_kfree(&pdev->dev, vm_dev);
+       kfree(vm_dev);
 }
 
 /* Platform device */
@@ -620,7 +619,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
        unsigned long magic;
        int rc;
 
-       vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
+       vm_dev = kzalloc(sizeof(*vm_dev), GFP_KERNEL);
        if (!vm_dev)
                return -ENOMEM;
 
index a6c86f9..c2524a7 100644 (file)
@@ -557,8 +557,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
 
        pci_set_master(pci_dev);
 
-       vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false;
-
        rc = register_virtio_device(&vp_dev->vdev);
        reg_dev = vp_dev;
        if (rc)
index 2257f1b..d9cbb02 100644 (file)
@@ -223,6 +223,7 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
        vp_dev->config_vector = vp_config_vector;
        vp_dev->setup_vq = setup_vq;
        vp_dev->del_vq = del_vq;
+       vp_dev->is_legacy = true;
 
        return 0;
 }
index 989e2d7..961161d 100644 (file)
@@ -393,11 +393,13 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
        cb.callback = virtio_vdpa_config_cb;
        cb.private = vd_dev;
        ops->set_config_cb(vdpa, &cb);
+       kfree(masks);
 
        return 0;
 
 err_setup_vq:
        virtio_vdpa_del_vqs(vdev);
+       kfree(masks);
        return err;
 }
 
index d5d7c40..d43153f 100644 (file)
@@ -269,6 +269,13 @@ config XEN_PRIVCMD
          disaggregated Xen setups this driver might be needed for other
          domains, too.
 
+config XEN_PRIVCMD_IRQFD
+       bool "Xen irqfd support"
+       depends on XEN_PRIVCMD && XEN_VIRTIO && EVENTFD
+       help
+         Using the irqfd mechanism a virtio backend running in a daemon can
+         speed up interrupt injection into a guest.
+
 config XEN_ACPI_PROCESSOR
        tristate "Xen ACPI processor"
        depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ
index c7715f8..3bdd5b5 100644 (file)
@@ -112,6 +112,7 @@ struct irq_info {
        unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
        u64 eoi_time;           /* Time in jiffies when to EOI. */
        raw_spinlock_t lock;
+       bool is_static;           /* Is event channel static */
 
        union {
                unsigned short virq;
@@ -815,15 +816,6 @@ static void xen_free_irq(unsigned irq)
        irq_free_desc(irq);
 }
 
-static void xen_evtchn_close(evtchn_port_t port)
-{
-       struct evtchn_close close;
-
-       close.port = port;
-       if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-               BUG();
-}
-
 /* Not called for lateeoi events. */
 static void event_handler_exit(struct irq_info *info)
 {
@@ -982,7 +974,8 @@ static void __unbind_from_irq(unsigned int irq)
                unsigned int cpu = cpu_from_irq(irq);
                struct xenbus_device *dev;
 
-               xen_evtchn_close(evtchn);
+               if (!info->is_static)
+                       xen_evtchn_close(evtchn);
 
                switch (type_from_irq(irq)) {
                case IRQT_VIRQ:
@@ -1574,7 +1567,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority)
 }
 EXPORT_SYMBOL_GPL(xen_set_irq_priority);
 
-int evtchn_make_refcounted(evtchn_port_t evtchn)
+int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
 {
        int irq = get_evtchn_to_irq(evtchn);
        struct irq_info *info;
@@ -1590,6 +1583,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn)
        WARN_ON(info->refcnt != -1);
 
        info->refcnt = 1;
+       info->is_static = is_static;
 
        return 0;
 }
index c99415a..9139a73 100644 (file)
@@ -366,10 +366,10 @@ static int evtchn_resize_ring(struct per_user_data *u)
        return 0;
 }
 
-static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
+static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port,
+                              bool is_static)
 {
        struct user_evtchn *evtchn;
-       struct evtchn_close close;
        int rc = 0;
 
        /*
@@ -402,14 +402,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
        if (rc < 0)
                goto err;
 
-       rc = evtchn_make_refcounted(port);
+       rc = evtchn_make_refcounted(port, is_static);
        return rc;
 
 err:
        /* bind failed, should close the port now */
-       close.port = port;
-       if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-               BUG();
+       if (!is_static)
+               xen_evtchn_close(port);
+
        del_evtchn(u, evtchn);
        return rc;
 }
@@ -456,7 +456,7 @@ static long evtchn_ioctl(struct file *file,
                if (rc != 0)
                        break;
 
-               rc = evtchn_bind_to_user(u, bind_virq.port);
+               rc = evtchn_bind_to_user(u, bind_virq.port, false);
                if (rc == 0)
                        rc = bind_virq.port;
                break;
@@ -482,7 +482,7 @@ static long evtchn_ioctl(struct file *file,
                if (rc != 0)
                        break;
 
-               rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
+               rc = evtchn_bind_to_user(u, bind_interdomain.local_port, false);
                if (rc == 0)
                        rc = bind_interdomain.local_port;
                break;
@@ -507,7 +507,7 @@ static long evtchn_ioctl(struct file *file,
                if (rc != 0)
                        break;
 
-               rc = evtchn_bind_to_user(u, alloc_unbound.port);
+               rc = evtchn_bind_to_user(u, alloc_unbound.port, false);
                if (rc == 0)
                        rc = alloc_unbound.port;
                break;
@@ -536,6 +536,23 @@ static long evtchn_ioctl(struct file *file,
                break;
        }
 
+       case IOCTL_EVTCHN_BIND_STATIC: {
+               struct ioctl_evtchn_bind bind;
+               struct user_evtchn *evtchn;
+
+               rc = -EFAULT;
+               if (copy_from_user(&bind, uarg, sizeof(bind)))
+                       break;
+
+               rc = -EISCONN;
+               evtchn = find_evtchn(u, bind.port);
+               if (evtchn)
+                       break;
+
+               rc = evtchn_bind_to_user(u, bind.port, true);
+               break;
+       }
+
        case IOCTL_EVTCHN_NOTIFY: {
                struct ioctl_evtchn_notify notify;
                struct user_evtchn *evtchn;
index 9784a77..76f6f26 100644 (file)
@@ -303,6 +303,8 @@ static struct device_node *xen_dt_get_node(struct device *dev)
                while (!pci_is_root_bus(bus))
                        bus = bus->parent;
 
+               if (!bus->bridge->parent)
+                       return NULL;
                return of_node_get(bus->bridge->parent->of_node);
        }
 
index e1ec725..35659bf 100644 (file)
@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list);
 static void gnttab_handle_deferred(struct timer_list *);
 static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
 
+static atomic64_t deferred_count;
+static atomic64_t leaked_count;
+static unsigned int free_per_iteration = 10;
+module_param(free_per_iteration, uint, 0600);
+
 static void gnttab_handle_deferred(struct timer_list *unused)
 {
-       unsigned int nr = 10;
+       unsigned int nr = READ_ONCE(free_per_iteration);
+       const bool ignore_limit = nr == 0;
        struct deferred_entry *first = NULL;
        unsigned long flags;
+       size_t freed = 0;
 
        spin_lock_irqsave(&gnttab_list_lock, flags);
-       while (nr--) {
+       while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
                struct deferred_entry *entry
                        = list_first_entry(&deferred_list,
                                           struct deferred_entry, list);
@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused)
                list_del(&entry->list);
                spin_unlock_irqrestore(&gnttab_list_lock, flags);
                if (_gnttab_end_foreign_access_ref(entry->ref)) {
+                       uint64_t ret = atomic64_dec_return(&deferred_count);
+
                        put_free_entry(entry->ref);
-                       pr_debug("freeing g.e. %#x (pfn %#lx)\n",
-                                entry->ref, page_to_pfn(entry->page));
+                       pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
+                                entry->ref, page_to_pfn(entry->page),
+                                (unsigned long long)ret);
                        put_page(entry->page);
+                       freed++;
                        kfree(entry);
                        entry = NULL;
                } else {
@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused)
                spin_lock_irqsave(&gnttab_list_lock, flags);
                if (entry)
                        list_add_tail(&entry->list, &deferred_list);
-               else if (list_empty(&deferred_list))
-                       break;
        }
-       if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
+       if (list_empty(&deferred_list))
+               WARN_ON(atomic64_read(&deferred_count));
+       else if (!timer_pending(&deferred_timer)) {
                deferred_timer.expires = jiffies + HZ;
                add_timer(&deferred_timer);
        }
        spin_unlock_irqrestore(&gnttab_list_lock, flags);
+       pr_debug("Freed %zu references", freed);
 }
 
 static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
 {
        struct deferred_entry *entry;
        gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
-       const char *what = KERN_WARNING "leaking";
+       uint64_t leaked, deferred;
 
        entry = kmalloc(sizeof(*entry), gfp);
        if (!page) {
@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
                        add_timer(&deferred_timer);
                }
                spin_unlock_irqrestore(&gnttab_list_lock, flags);
-               what = KERN_DEBUG "deferring";
+               deferred = atomic64_inc_return(&deferred_count);
+               leaked = atomic64_read(&leaked_count);
+               pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
+                        ref, page ? page_to_pfn(page) : -1, deferred, leaked);
+       } else {
+               deferred = atomic64_read(&deferred_count);
+               leaked = atomic64_inc_return(&leaked_count);
+               pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
+                       ref, page ? page_to_pfn(page) : -1, deferred, leaked);
        }
-       printk("%s g.e. %#x (pfn %#lx)\n",
-              what, ref, page ? page_to_pfn(page) : -1);
 }
 
 int gnttab_try_end_foreign_access(grant_ref_t ref)
@@ -1026,7 +1044,7 @@ EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
 
 /**
  * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
- * @nr_pages; number of pages to free
+ * @nr_pages: number of pages to free
  * @pages: the pages
  */
 void gnttab_free_pages(int nr_pages, struct page **pages)
index f447cd3..f00ad5f 100644 (file)
@@ -9,11 +9,16 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
+#include <linux/eventfd.h>
+#include <linux/file.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/workqueue.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -833,6 +838,263 @@ out:
        return rc;
 }
 
+#ifdef CONFIG_XEN_PRIVCMD_IRQFD
+/* Irqfd support */
+static struct workqueue_struct *irqfd_cleanup_wq;
+static DEFINE_MUTEX(irqfds_lock);
+static LIST_HEAD(irqfds_list);
+
+struct privcmd_kernel_irqfd {
+       struct xen_dm_op_buf xbufs;
+       domid_t dom;
+       bool error;
+       struct eventfd_ctx *eventfd;
+       struct work_struct shutdown;
+       wait_queue_entry_t wait;
+       struct list_head list;
+       poll_table pt;
+};
+
+static void irqfd_deactivate(struct privcmd_kernel_irqfd *kirqfd)
+{
+       lockdep_assert_held(&irqfds_lock);
+
+       list_del_init(&kirqfd->list);
+       queue_work(irqfd_cleanup_wq, &kirqfd->shutdown);
+}
+
+static void irqfd_shutdown(struct work_struct *work)
+{
+       struct privcmd_kernel_irqfd *kirqfd =
+               container_of(work, struct privcmd_kernel_irqfd, shutdown);
+       u64 cnt;
+
+       eventfd_ctx_remove_wait_queue(kirqfd->eventfd, &kirqfd->wait, &cnt);
+       eventfd_ctx_put(kirqfd->eventfd);
+       kfree(kirqfd);
+}
+
+static void irqfd_inject(struct privcmd_kernel_irqfd *kirqfd)
+{
+       u64 cnt;
+       long rc;
+
+       eventfd_ctx_do_read(kirqfd->eventfd, &cnt);
+
+       xen_preemptible_hcall_begin();
+       rc = HYPERVISOR_dm_op(kirqfd->dom, 1, &kirqfd->xbufs);
+       xen_preemptible_hcall_end();
+
+       /* Don't repeat the error message for consecutive failures */
+       if (rc && !kirqfd->error) {
+               pr_err("Failed to configure irq for guest domain: %d\n",
+                      kirqfd->dom);
+       }
+
+       kirqfd->error = rc;
+}
+
+static int
+irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
+{
+       struct privcmd_kernel_irqfd *kirqfd =
+               container_of(wait, struct privcmd_kernel_irqfd, wait);
+       __poll_t flags = key_to_poll(key);
+
+       if (flags & EPOLLIN)
+               irqfd_inject(kirqfd);
+
+       if (flags & EPOLLHUP) {
+               mutex_lock(&irqfds_lock);
+               irqfd_deactivate(kirqfd);
+               mutex_unlock(&irqfds_lock);
+       }
+
+       return 0;
+}
+
+static void
+irqfd_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
+{
+       struct privcmd_kernel_irqfd *kirqfd =
+               container_of(pt, struct privcmd_kernel_irqfd, pt);
+
+       add_wait_queue_priority(wqh, &kirqfd->wait);
+}
+
+static int privcmd_irqfd_assign(struct privcmd_irqfd *irqfd)
+{
+       struct privcmd_kernel_irqfd *kirqfd, *tmp;
+       __poll_t events;
+       struct fd f;
+       void *dm_op;
+       int ret;
+
+       kirqfd = kzalloc(sizeof(*kirqfd) + irqfd->size, GFP_KERNEL);
+       if (!kirqfd)
+               return -ENOMEM;
+       dm_op = kirqfd + 1;
+
+       if (copy_from_user(dm_op, irqfd->dm_op, irqfd->size)) {
+               ret = -EFAULT;
+               goto error_kfree;
+       }
+
+       kirqfd->xbufs.size = irqfd->size;
+       set_xen_guest_handle(kirqfd->xbufs.h, dm_op);
+       kirqfd->dom = irqfd->dom;
+       INIT_WORK(&kirqfd->shutdown, irqfd_shutdown);
+
+       f = fdget(irqfd->fd);
+       if (!f.file) {
+               ret = -EBADF;
+               goto error_kfree;
+       }
+
+       kirqfd->eventfd = eventfd_ctx_fileget(f.file);
+       if (IS_ERR(kirqfd->eventfd)) {
+               ret = PTR_ERR(kirqfd->eventfd);
+               goto error_fd_put;
+       }
+
+       /*
+        * Install our own custom wake-up handling so we are notified via a
+        * callback whenever someone signals the underlying eventfd.
+        */
+       init_waitqueue_func_entry(&kirqfd->wait, irqfd_wakeup);
+       init_poll_funcptr(&kirqfd->pt, irqfd_poll_func);
+
+       mutex_lock(&irqfds_lock);
+
+       list_for_each_entry(tmp, &irqfds_list, list) {
+               if (kirqfd->eventfd == tmp->eventfd) {
+                       ret = -EBUSY;
+                       mutex_unlock(&irqfds_lock);
+                       goto error_eventfd;
+               }
+       }
+
+       list_add_tail(&kirqfd->list, &irqfds_list);
+       mutex_unlock(&irqfds_lock);
+
+       /*
+        * Check if there was an event already pending on the eventfd before we
+        * registered, and trigger it as if we didn't miss it.
+        */
+       events = vfs_poll(f.file, &kirqfd->pt);
+       if (events & EPOLLIN)
+               irqfd_inject(kirqfd);
+
+       /*
+        * Do not drop the file until the kirqfd is fully initialized, otherwise
+        * we might race against the EPOLLHUP.
+        */
+       fdput(f);
+       return 0;
+
+error_eventfd:
+       eventfd_ctx_put(kirqfd->eventfd);
+
+error_fd_put:
+       fdput(f);
+
+error_kfree:
+       kfree(kirqfd);
+       return ret;
+}
+
+static int privcmd_irqfd_deassign(struct privcmd_irqfd *irqfd)
+{
+       struct privcmd_kernel_irqfd *kirqfd;
+       struct eventfd_ctx *eventfd;
+
+       eventfd = eventfd_ctx_fdget(irqfd->fd);
+       if (IS_ERR(eventfd))
+               return PTR_ERR(eventfd);
+
+       mutex_lock(&irqfds_lock);
+
+       list_for_each_entry(kirqfd, &irqfds_list, list) {
+               if (kirqfd->eventfd == eventfd) {
+                       irqfd_deactivate(kirqfd);
+                       break;
+               }
+       }
+
+       mutex_unlock(&irqfds_lock);
+
+       eventfd_ctx_put(eventfd);
+
+       /*
+        * Block until we know all outstanding shutdown jobs have completed so
+        * that we guarantee there will not be any more interrupts once this
+        * deassign function returns.
+        */
+       flush_workqueue(irqfd_cleanup_wq);
+
+       return 0;
+}
+
+static long privcmd_ioctl_irqfd(struct file *file, void __user *udata)
+{
+       struct privcmd_data *data = file->private_data;
+       struct privcmd_irqfd irqfd;
+
+       if (copy_from_user(&irqfd, udata, sizeof(irqfd)))
+               return -EFAULT;
+
+       /* No other flags should be set */
+       if (irqfd.flags & ~PRIVCMD_IRQFD_FLAG_DEASSIGN)
+               return -EINVAL;
+
+       /* If restriction is in place, check the domid matches */
+       if (data->domid != DOMID_INVALID && data->domid != irqfd.dom)
+               return -EPERM;
+
+       if (irqfd.flags & PRIVCMD_IRQFD_FLAG_DEASSIGN)
+               return privcmd_irqfd_deassign(&irqfd);
+
+       return privcmd_irqfd_assign(&irqfd);
+}
+
+static int privcmd_irqfd_init(void)
+{
+       irqfd_cleanup_wq = alloc_workqueue("privcmd-irqfd-cleanup", 0, 0);
+       if (!irqfd_cleanup_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void privcmd_irqfd_exit(void)
+{
+       struct privcmd_kernel_irqfd *kirqfd, *tmp;
+
+       mutex_lock(&irqfds_lock);
+
+       list_for_each_entry_safe(kirqfd, tmp, &irqfds_list, list)
+               irqfd_deactivate(kirqfd);
+
+       mutex_unlock(&irqfds_lock);
+
+       destroy_workqueue(irqfd_cleanup_wq);
+}
+#else
+static inline long privcmd_ioctl_irqfd(struct file *file, void __user *udata)
+{
+       return -EOPNOTSUPP;
+}
+
+static inline int privcmd_irqfd_init(void)
+{
+       return 0;
+}
+
+static inline void privcmd_irqfd_exit(void)
+{
+}
+#endif /* CONFIG_XEN_PRIVCMD_IRQFD */
+
 static long privcmd_ioctl(struct file *file,
                          unsigned int cmd, unsigned long data)
 {
@@ -868,6 +1130,10 @@ static long privcmd_ioctl(struct file *file,
                ret = privcmd_ioctl_mmap_resource(file, udata);
                break;
 
+       case IOCTL_PRIVCMD_IRQFD:
+               ret = privcmd_ioctl_irqfd(file, udata);
+               break;
+
        default:
                break;
        }
@@ -992,15 +1258,27 @@ static int __init privcmd_init(void)
        err = misc_register(&xen_privcmdbuf_dev);
        if (err != 0) {
                pr_err("Could not register Xen hypercall-buf device\n");
-               misc_deregister(&privcmd_dev);
-               return err;
+               goto err_privcmdbuf;
+       }
+
+       err = privcmd_irqfd_init();
+       if (err != 0) {
+               pr_err("irqfd init failed\n");
+               goto err_irqfd;
        }
 
        return 0;
+
+err_irqfd:
+       misc_deregister(&xen_privcmdbuf_dev);
+err_privcmdbuf:
+       misc_deregister(&privcmd_dev);
+       return err;
 }
 
 static void __exit privcmd_exit(void)
 {
+       privcmd_irqfd_exit();
        misc_deregister(&privcmd_dev);
        misc_deregister(&xen_privcmdbuf_dev);
 }
index 9cb61db..2967039 100644 (file)
@@ -473,11 +473,8 @@ static int xen_upload_processor_pm_data(void)
                if (!_pr)
                        continue;
 
-               if (!pr_backup) {
-                       pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
-                       if (pr_backup)
-                               memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
-               }
+               if (!pr_backup)
+                       pr_backup = kmemdup(_pr, sizeof(*_pr), GFP_KERNEL);
                (void)upload_pm_data(_pr);
        }
 
index d873abe..fc1557d 100644 (file)
@@ -21,8 +21,6 @@ struct xen_pcibk_config_quirk {
 int xen_pcibk_config_quirks_add_field(struct pci_dev *dev, struct config_field
                                    *field);
 
-int xen_pcibk_config_quirks_remove_field(struct pci_dev *dev, int reg);
-
 int xen_pcibk_config_quirks_init(struct pci_dev *dev);
 
 void xen_pcibk_config_field_free(struct config_field *field);
index 9a64196..f9599ed 100644 (file)
@@ -201,6 +201,3 @@ static inline void xen_pcibk_lateeoi(struct xen_pcibk_device *pdev,
 int xen_pcibk_xenbus_register(void);
 void xen_pcibk_xenbus_unregister(void);
 #endif
-
-/* Handles shared IRQs that can to device domain and control domain. */
-void xen_pcibk_irq_handler(struct pci_dev *dev, int reset);
index 58b732d..639bf62 100644 (file)
@@ -811,6 +811,9 @@ static int xenbus_probe_thread(void *unused)
 
 static int __init xenbus_probe_initcall(void)
 {
+       if (!xen_domain())
+               return -ENODEV;
+
        /*
         * Probe XenBus here in the XS_PV case, and also XS_HVM unless we
         * need to wait for the platform PCI device to come up or
index 3f3836c..fcb335b 100644 (file)
@@ -429,7 +429,7 @@ static void xenbus_check_frontend(char *class, char *dev)
                printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
                                frontend, xenbus_strstate(fe_state));
                backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
-               if (!backend || IS_ERR(backend))
+               if (IS_ERR_OR_NULL(backend))
                        goto out;
                err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
                if (err == 1)
index 12e02eb..028a182 100644 (file)
@@ -840,8 +840,8 @@ void xs_suspend(void)
 {
        xs_suspend_enter();
 
-       down_write(&xs_watch_rwsem);
        mutex_lock(&xs_response_mutex);
+       down_write(&xs_watch_rwsem);
 }
 
 void xs_resume(void)
@@ -866,8 +866,8 @@ void xs_resume(void)
 
 void xs_suspend_cancel(void)
 {
-       mutex_unlock(&xs_response_mutex);
        up_write(&xs_watch_rwsem);
+       mutex_unlock(&xs_response_mutex);
 
        xs_suspend_exit();
 }
index fa3c83d..077114c 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/types.h>
 #include <linux/zorro.h>
 
+#include "zorro.h"
 
 struct zorro_prod_info {
        __u16 prod;
index 0c51889..29281b7 100644 (file)
@@ -46,8 +46,8 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
  * NOTE: these are set after open so only reflect 9p client not
  * underlying file system on server.
  */
-static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags,
-       int s_cache, unsigned int f_flags)
+static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags,
+       unsigned int s_cache, unsigned int f_flags)
 {
        if (fid->qid.type != P9_QTFILE)
                return;
@@ -57,7 +57,7 @@ static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags,
           (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) {
                fid->mode |= P9L_DIRECT; /* no read or write cache */
        } else if ((!(s_cache & CACHE_WRITEBACK)) ||
-                               (f_flags & O_DSYNC) | (s_flags & V9FS_SYNC)) {
+                               (f_flags & O_DSYNC) || (s_flags & V9FS_SYNC)) {
                fid->mode |= P9L_NOWRITECACHE;
        }
 }
index c7f774f..d525957 100644 (file)
@@ -545,8 +545,6 @@ void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses)
        p9_client_begin_disconnect(v9ses->clnt);
 }
 
-extern int v9fs_error_init(void);
-
 static struct kobject *v9fs_kobj;
 
 #ifdef CONFIG_9P_FSCACHE
index 06a2514..698c43d 100644 (file)
@@ -108,7 +108,7 @@ enum p9_cache_bits {
 
 struct v9fs_session_info {
        /* options */
-       unsigned char flags;
+       unsigned int flags;
        unsigned char nodev;
        unsigned short debug;
        unsigned int afid;
index 45b684b..4102759 100644 (file)
@@ -208,7 +208,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
        struct p9_fid *fid;
        __le32 version;
        loff_t i_size;
-       int retval = 0;
+       int retval = 0, put_err;
 
        fid = filp->private_data;
        p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
@@ -221,7 +221,8 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
                spin_lock(&inode->i_lock);
                hlist_del(&fid->ilist);
                spin_unlock(&inode->i_lock);
-               retval = p9_fid_put(fid);
+               put_err = p9_fid_put(fid);
+               retval = retval < 0 ? retval : put_err;
        }
 
        if ((filp->f_mode & FMODE_WRITE)) {
index 2996fb0..11cd8d2 100644 (file)
@@ -505,10 +505,7 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
        p9_debug(P9_DEBUG_MMAP, "filp :%p\n", filp);
 
        if (!(v9ses->cache & CACHE_WRITEBACK)) {
-               p9_debug(P9_DEBUG_CACHE, "(no mmap mode)");
-               if (vma->vm_flags & VM_MAYSHARE)
-                       return -ENODEV;
-               invalidate_inode_pages2(filp->f_mapping);
+               p9_debug(P9_DEBUG_CACHE, "(read-only mmap mode)");
                return generic_file_readonly_mmap(filp, vma);
        }
 
index 36b466e..0d28ecf 100644 (file)
@@ -163,7 +163,6 @@ int v9fs_uflags2omode(int uflags, int extended)
 {
        int ret;
 
-       ret = 0;
        switch (uflags&3) {
        default:
        case O_RDONLY:
@@ -261,7 +260,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
        inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
        inode->i_blocks = 0;
        inode->i_rdev = rdev;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_mapping->a_ops = &v9fs_addr_operations;
        inode->i_private = NULL;
 
@@ -603,7 +602,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 
        p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
 
-       err = 0;
        name = dentry->d_name.name;
        dfid = v9fs_parent_fid(dentry);
        if (IS_ERR(dfid)) {
@@ -815,8 +813,6 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
        if (!(flags & O_CREAT) || d_really_is_positive(dentry))
                return finish_no_open(file, res);
 
-       err = 0;
-
        v9ses = v9fs_inode2v9ses(dir);
        perm = unixmode2p9mode(v9ses, mode);
        p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses));
@@ -912,7 +908,6 @@ v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                return -EINVAL;
 
        p9_debug(P9_DEBUG_VFS, "\n");
-       retval = 0;
        old_inode = d_inode(old_dentry);
        new_inode = d_inode(new_dentry);
        v9ses = v9fs_inode2v9ses(old_inode);
@@ -1016,7 +1011,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
        p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
        v9ses = v9fs_dentry2v9ses(dentry);
        if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
-               generic_fillattr(&nop_mnt_idmap, inode, stat);
+               generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
                return 0;
        } else if (v9ses->cache & CACHE_WRITEBACK) {
                if (S_ISREG(inode->i_mode)) {
@@ -1037,7 +1032,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
                return PTR_ERR(st);
 
        v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
-       generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
 
        p9stat_free(st);
        kfree(st);
@@ -1066,7 +1061,6 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
        if (retval)
                return retval;
 
-       retval = -EPERM;
        v9ses = v9fs_dentry2v9ses(dentry);
        if (iattr->ia_valid & ATTR_FILE) {
                fid = iattr->ia_file->private_data;
@@ -1158,7 +1152,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
 
        inode->i_atime.tv_sec = stat->atime;
        inode->i_mtime.tv_sec = stat->mtime;
-       inode->i_ctime.tv_sec = stat->mtime;
+       inode_set_ctime(inode, stat->mtime, 0);
 
        inode->i_uid = v9ses->dfltuid;
        inode->i_gid = v9ses->dfltgid;
index 5361cd2..1312f68 100644 (file)
@@ -366,7 +366,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
        struct posix_acl *dacl = NULL, *pacl = NULL;
 
        p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
-       err = 0;
        v9ses = v9fs_inode2v9ses(dir);
 
        omode |= S_IFDIR;
@@ -451,7 +450,7 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
        p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
        v9ses = v9fs_dentry2v9ses(dentry);
        if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
-               generic_fillattr(&nop_mnt_idmap, inode, stat);
+               generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
                return 0;
        } else if (v9ses->cache) {
                if (S_ISREG(inode->i_mode)) {
@@ -476,7 +475,7 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
                return PTR_ERR(st);
 
        v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
-       generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
        /* Change block size to what the server returned */
        stat->blksize = st->st_blksize;
 
@@ -646,8 +645,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
                inode->i_atime.tv_nsec = stat->st_atime_nsec;
                inode->i_mtime.tv_sec = stat->st_mtime_sec;
                inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
-               inode->i_ctime.tv_sec = stat->st_ctime_sec;
-               inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+               inode_set_ctime(inode, stat->st_ctime_sec,
+                               stat->st_ctime_nsec);
                inode->i_uid = stat->st_uid;
                inode->i_gid = stat->st_gid;
                set_nlink(inode, stat->st_nlink);
@@ -669,8 +668,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
                        inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
                }
                if (stat->st_result_mask & P9_STATS_CTIME) {
-                       inode->i_ctime.tv_sec = stat->st_ctime_sec;
-                       inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+                       inode_set_ctime(inode, stat->st_ctime_sec,
+                                       stat->st_ctime_nsec);
                }
                if (stat->st_result_mask & P9_STATS_UID)
                        inode->i_uid = stat->st_uid;
index 18d034e..7da21f5 100644 (file)
@@ -205,8 +205,8 @@ config TMPFS_XATTR
          Extended attributes are name:value pairs associated with inodes by
          the kernel or by users (see the attr(5) manual page for details).
 
-         Currently this enables support for the trusted.* and
-         security.* namespaces.
+         This enables support for the trusted.*, security.* and user.*
+         namespaces.
 
          You need this for POSIX ACL support on tmpfs.
 
@@ -233,6 +233,18 @@ config TMPFS_INODE64
 
          If unsure, say N.
 
+config TMPFS_QUOTA
+       bool "Tmpfs quota support"
+       depends on TMPFS
+       select QUOTA
+       help
+         Quota support allows to set per user and group limits for tmpfs
+         usage.  Say Y to enable quota support. Once enabled you can control
+         user and group quota enforcement with quota, usrquota and grpquota
+         mount options.
+
+         If unsure, say N.
+
 config ARCH_SUPPORTS_HUGETLBFS
        def_bool n
 
index c3ac613..2096300 100644 (file)
@@ -270,7 +270,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
        inode->i_mode    = adfs_atts2mode(sb, inode);
        adfs_adfs2unix_time(&inode->i_mtime, inode);
        inode->i_atime = inode->i_mtime;
-       inode->i_ctime = inode->i_mtime;
+       inode_set_ctime_to_ts(inode, inode->i_mtime);
 
        if (S_ISDIR(inode->i_mode)) {
                inode->i_op     = &adfs_dir_inode_operations;
@@ -331,7 +331,7 @@ adfs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
        if (ia_valid & ATTR_ATIME)
                inode->i_atime = attr->ia_atime;
        if (ia_valid & ATTR_CTIME)
-               inode->i_ctime = attr->ia_ctime;
+               inode_set_ctime_to_ts(inode, attr->ia_ctime);
        if (ia_valid & ATTR_MODE) {
                ADFS_I(inode)->attr = adfs_mode2atts(sb, inode, attr->ia_mode);
                inode->i_mode = adfs_atts2mode(sb, inode);
index 29f11e1..7ba93ef 100644 (file)
@@ -60,7 +60,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
        mark_buffer_dirty_inode(dir_bh, dir);
        affs_brelse(dir_bh);
 
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        inode_inc_iversion(dir);
        mark_inode_dirty(dir);
 
@@ -114,7 +114,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
 
        affs_brelse(bh);
 
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        inode_inc_iversion(dir);
        mark_inode_dirty(dir);
 
@@ -315,7 +315,7 @@ affs_remove_header(struct dentry *dentry)
        else
                clear_nlink(inode);
        affs_unlock_link(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 
 done:
index e43f2f0..472e2bd 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <linux/uio.h>
 #include <linux/blkdev.h>
+#include <linux/mpage.h>
 #include "affs.h"
 
 static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
@@ -370,9 +371,10 @@ err_alloc:
        return -ENOSPC;
 }
 
-static int affs_writepage(struct page *page, struct writeback_control *wbc)
+static int affs_writepages(struct address_space *mapping,
+                          struct writeback_control *wbc)
 {
-       return block_write_full_page(page, affs_get_block, wbc);
+       return mpage_writepages(mapping, wbc, affs_get_block);
 }
 
 static int affs_read_folio(struct file *file, struct folio *folio)
@@ -456,10 +458,11 @@ const struct address_space_operations affs_aops = {
        .dirty_folio    = block_dirty_folio,
        .invalidate_folio = block_invalidate_folio,
        .read_folio = affs_read_folio,
-       .writepage = affs_writepage,
+       .writepages = affs_writepages,
        .write_begin = affs_write_begin,
        .write_end = affs_write_end,
        .direct_IO = affs_direct_IO,
+       .migrate_folio = buffer_migrate_folio,
        .bmap = _affs_bmap
 };
 
@@ -835,9 +838,10 @@ const struct address_space_operations affs_aops_ofs = {
        .dirty_folio    = block_dirty_folio,
        .invalidate_folio = block_invalidate_folio,
        .read_folio = affs_read_folio_ofs,
-       //.writepage = affs_writepage_ofs,
+       //.writepages = affs_writepages_ofs,
        .write_begin = affs_write_begin_ofs,
-       .write_end = affs_write_end_ofs
+       .write_end = affs_write_end_ofs,
+       .migrate_folio = filemap_migrate_folio,
 };
 
 /* Free any preallocated blocks. */
index 27f77a5..060746c 100644 (file)
@@ -149,13 +149,13 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
                break;
        }
 
-       inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec
-                      = (be32_to_cpu(tail->change.days) * 86400LL +
-                        be32_to_cpu(tail->change.mins) * 60 +
-                        be32_to_cpu(tail->change.ticks) / 50 +
-                        AFFS_EPOCH_DELTA) +
-                        sys_tz.tz_minuteswest * 60;
-       inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_atime.tv_nsec = 0;
+       inode->i_mtime.tv_sec = inode->i_atime.tv_sec =
+               inode_set_ctime(inode,
+                               (be32_to_cpu(tail->change.days) * 86400LL +
+                                be32_to_cpu(tail->change.mins) * 60 +
+                                be32_to_cpu(tail->change.ticks) / 50 + AFFS_EPOCH_DELTA)
+                               + sys_tz.tz_minuteswest * 60, 0).tv_sec;
+       inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0;
        affs_brelse(bh);
        unlock_new_inode(inode);
        return inode;
@@ -314,7 +314,7 @@ affs_new_inode(struct inode *dir)
        inode->i_gid     = current_fsgid();
        inode->i_ino     = block;
        set_nlink(inode, 1);
-       inode->i_mtime   = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime   = inode->i_atime = inode_set_ctime_current(inode);
        atomic_set(&AFFS_I(inode)->i_opencnt, 0);
        AFFS_I(inode)->i_blkcnt = 0;
        AFFS_I(inode)->i_lc = NULL;
index d12ccfd..2fe4a58 100644 (file)
@@ -43,7 +43,7 @@ affs_get_toupper(struct super_block *sb)
  * Note: the dentry argument is the parent dentry.
  */
 static inline int
-__affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t toupper, bool notruncate)
+__affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t fn, bool notruncate)
 {
        const u8 *name = qstr->name;
        unsigned long hash;
@@ -57,7 +57,7 @@ __affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t tou
        hash = init_name_hash(dentry);
        len = min(qstr->len, AFFSNAMEMAX);
        for (; len > 0; name++, len--)
-               hash = partial_name_hash(toupper(*name), hash);
+               hash = partial_name_hash(fn(*name), hash);
        qstr->hash = end_name_hash(hash);
 
        return 0;
@@ -80,7 +80,7 @@ affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
 }
 
 static inline int __affs_compare_dentry(unsigned int len,
-               const char *str, const struct qstr *name, toupper_t toupper,
+               const char *str, const struct qstr *name, toupper_t fn,
                bool notruncate)
 {
        const u8 *aname = str;
@@ -106,7 +106,7 @@ static inline int __affs_compare_dentry(unsigned int len,
                return 1;
 
        for (; len > 0; len--)
-               if (toupper(*aname++) != toupper(*bname++))
+               if (fn(*aname++) != fn(*bname++))
                        return 1;
 
        return 0;
@@ -135,7 +135,7 @@ affs_intl_compare_dentry(const struct dentry *dentry,
  */
 
 static inline int
-affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
+affs_match(struct dentry *dentry, const u8 *name2, toupper_t fn)
 {
        const u8 *name = dentry->d_name.name;
        int len = dentry->d_name.len;
@@ -148,7 +148,7 @@ affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
                return 0;
 
        for (name2++; len > 0; len--)
-               if (toupper(*name++) != toupper(*name2++))
+               if (fn(*name++) != fn(*name2++))
                        return 0;
        return 1;
 }
@@ -156,12 +156,12 @@ affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
 int
 affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len)
 {
-       toupper_t toupper = affs_get_toupper(sb);
+       toupper_t fn = affs_get_toupper(sb);
        u32 hash;
 
        hash = len = min(len, AFFSNAMEMAX);
        for (; len > 0; len--)
-               hash = (hash * 13 + toupper(*name++)) & 0x7ff;
+               hash = (hash * 13 + fn(*name++)) & 0x7ff;
 
        return hash % AFFS_SB(sb)->s_hashsize;
 }
@@ -171,7 +171,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
 {
        struct super_block *sb = dir->i_sb;
        struct buffer_head *bh;
-       toupper_t toupper = affs_get_toupper(sb);
+       toupper_t fn = affs_get_toupper(sb);
        u32 key;
 
        pr_debug("%s(\"%pd\")\n", __func__, dentry);
@@ -189,7 +189,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
                bh = affs_bread(sb, key);
                if (!bh)
                        return ERR_PTR(-EIO);
-               if (affs_match(dentry, AFFS_TAIL(sb, bh)->name, toupper))
+               if (affs_match(dentry, AFFS_TAIL(sb, bh)->name, fn))
                        return bh;
                key = be32_to_cpu(AFFS_TAIL(sb, bh)->hash_chain);
        }
index d7d9402..95bcbd7 100644 (file)
@@ -88,7 +88,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
        set_nlink(inode, 2);
        inode->i_uid            = GLOBAL_ROOT_UID;
        inode->i_gid            = GLOBAL_ROOT_GID;
-       inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_blocks         = 0;
        inode->i_generation     = 0;
 
index 866bab8..1c794a1 100644 (file)
@@ -90,7 +90,7 @@ static int afs_inode_init_from_status(struct afs_operation *op,
        vnode->status = *status;
 
        t = status->mtime_client;
-       inode->i_ctime = t;
+       inode_set_ctime_to_ts(inode, t);
        inode->i_mtime = t;
        inode->i_atime = t;
        inode->i_flags |= S_NOATIME;
@@ -206,7 +206,7 @@ static void afs_apply_status(struct afs_operation *op,
        t = status->mtime_client;
        inode->i_mtime = t;
        if (vp->update_ctime)
-               inode->i_ctime = op->ctime;
+               inode_set_ctime_to_ts(inode, op->ctime);
 
        if (vnode->status.data_version != status->data_version)
                data_changed = true;
@@ -252,7 +252,7 @@ static void afs_apply_status(struct afs_operation *op,
                vnode->netfs.remote_i_size = status->size;
                if (change_size) {
                        afs_set_i_size(vnode, status->size);
-                       inode->i_ctime = t;
+                       inode_set_ctime_to_ts(inode, t);
                        inode->i_atime = t;
                }
        }
@@ -773,7 +773,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
 
        do {
                read_seqbegin_or_lock(&vnode->cb_lock, &seq);
-               generic_fillattr(&nop_mnt_idmap, inode, stat);
+               generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
                if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
                    stat->nlink > 0)
                        stat->nlink -= 1;
index 77e3361..b3174da 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1447,13 +1447,8 @@ static void aio_complete_rw(struct kiocb *kiocb, long res)
        if (kiocb->ki_flags & IOCB_WRITE) {
                struct inode *inode = file_inode(kiocb->ki_filp);
 
-               /*
-                * Tell lockdep we inherited freeze protection from submission
-                * thread.
-                */
                if (S_ISREG(inode->i_mode))
-                       __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
-               file_end_write(kiocb->ki_filp);
+                       kiocb_end_write(kiocb);
        }
 
        iocb->ki_res.res = res;
@@ -1581,17 +1576,8 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
                return ret;
        ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
        if (!ret) {
-               /*
-                * Open-code file_start_write here to grab freeze protection,
-                * which will be released by another thread in
-                * aio_complete_rw().  Fool lockdep by telling it the lock got
-                * released so that it doesn't complain about the held lock when
-                * we return to userspace.
-                */
-               if (S_ISREG(file_inode(file)->i_mode)) {
-                       sb_start_write(file_inode(file)->i_sb);
-                       __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
-               }
+               if (S_ISREG(file_inode(file)->i_mode))
+                       kiocb_start_write(req);
                req->ki_flags |= IOCB_WRITE;
                aio_rw_done(req, call_write_iter(file, req, &iter));
        }
index d60dc1e..a8ae5f6 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -312,7 +312,7 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
        if (ia_valid & ATTR_MTIME)
                inode->i_mtime = attr->ia_mtime;
        if (ia_valid & ATTR_CTIME)
-               inode->i_ctime = attr->ia_ctime;
+               inode_set_ctime_to_ts(inode, attr->ia_ctime);
        if (ia_valid & ATTR_MODE) {
                umode_t mode = attr->ia_mode;
                if (!in_group_or_capable(idmap, inode,
@@ -394,9 +394,25 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
                return error;
 
        if ((ia_valid & ATTR_MODE)) {
-               umode_t amode = attr->ia_mode;
+               /*
+                * Don't allow changing the mode of symlinks:
+                *
+                * (1) The vfs doesn't take the mode of symlinks into account
+                *     during permission checking.
+                * (2) This has never worked correctly. Most major filesystems
+                *     did return EOPNOTSUPP due to interactions with POSIX ACLs
+                *     but did still updated the mode of the symlink.
+                *     This inconsistency led system call wrapper providers such
+                *     as libc to block changing the mode of symlinks with
+                *     EOPNOTSUPP already.
+                * (3) To even do this in the first place one would have to use
+                *     specific file descriptors and quite some effort.
+                */
+               if (S_ISLNK(inode->i_mode))
+                       return -EOPNOTSUPP;
+
                /* Flag setting protected by i_mutex */
-               if (is_sxid(amode))
+               if (is_sxid(attr->ia_mode))
                        inode->i_flags &= ~S_NOSEC;
        }
 
index 3b3a6b1..54c12d9 100644 (file)
@@ -1,18 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
-config AUTOFS4_FS
-       tristate "Old Kconfig name for Kernel automounter support"
-       select AUTOFS_FS
-       help
-          This name exists for people to just automatically pick up the
-          new name of the autofs Kconfig option. All it does is select
-          the new option name.
-
-          It will go away in a release or two as people have
-          transitioned to just plain AUTOFS_FS.
-
 config AUTOFS_FS
        tristate "Kernel automounter support (supports v3, v4 and v5)"
-       default n
        help
           The automounter is a tool to automatically mount remote file systems
           on demand. This implementation is partially kernel-based to reduce
index affa703..2b49662 100644 (file)
@@ -370,7 +370,7 @@ struct inode *autofs_get_inode(struct super_block *sb, umode_t mode)
                inode->i_uid = d_inode(sb->s_root)->i_uid;
                inode->i_gid = d_inode(sb->s_root)->i_gid;
        }
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_ino = get_next_ino();
 
        if (S_ISDIR(mode)) {
index 93046c9..512b9a2 100644 (file)
@@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap,
        p_ino = autofs_dentry_ino(dentry->d_parent);
        p_ino->count++;
 
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 
        return 0;
 }
@@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
        d_inode(dentry)->i_size = 0;
        clear_nlink(d_inode(dentry));
 
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 
        spin_lock(&sbi->lookup_lock);
        __autofs_add_expiring(dentry);
@@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap,
        p_ino = autofs_dentry_ino(dentry->d_parent);
        p_ino->count++;
        inc_nlink(dir);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 
        return 0;
 }
index 54c1f8b..33dd466 100644 (file)
@@ -32,8 +32,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
                wq->status = -ENOENT; /* Magic is gone - report failure */
                kfree(wq->name.name - wq->offset);
                wq->name.name = NULL;
-               wq->wait_ctr--;
-               wake_up_interruptible(&wq->queue);
+               wake_up(&wq->queue);
+               if (!--wq->wait_ctr)
+                       kfree(wq);
                wq = nwq;
        }
        fput(sbi->pipe);        /* Close the pipe */
index db64948..83f9566 100644 (file)
@@ -133,8 +133,7 @@ static int bad_inode_fiemap(struct inode *inode,
        return -EIO;
 }
 
-static int bad_inode_update_time(struct inode *inode, struct timespec64 *time,
-                                int flags)
+static int bad_inode_update_time(struct inode *inode, int flags)
 {
        return -EIO;
 }
@@ -209,8 +208,7 @@ void make_bad_inode(struct inode *inode)
        remove_inode_hash(inode);
 
        inode->i_mode = S_IFREG;
-       inode->i_atime = inode->i_mtime = inode->i_ctime =
-               current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_op = &bad_inode_ops;   
        inode->i_opflags &= ~IOP_XATTR;
        inode->i_fop = &bad_file_ops;   
index eee9237..9a16a51 100644 (file)
@@ -363,7 +363,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
        inode->i_mtime.tv_sec =
            fs64_to_cpu(sb, raw_inode->last_modified_time) >> 16;
        inode->i_mtime.tv_nsec = 0;   /* lower 16 bits are not a time */
-       inode->i_ctime = inode->i_mtime;
+       inode_set_ctime_to_ts(inode, inode->i_mtime);
        inode->i_atime = inode->i_mtime;
 
        befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num);
index 040d514..12b8af0 100644 (file)
@@ -97,7 +97,7 @@ static int bfs_create(struct mnt_idmap *idmap, struct inode *dir,
        set_bit(ino, info->si_imap);
        info->si_freei--;
        inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_blocks = 0;
        inode->i_op = &bfs_file_inops;
        inode->i_fop = &bfs_file_operations;
@@ -158,7 +158,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
                return err;
        }
        inc_nlink(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        ihold(inode);
        d_instantiate(new, inode);
@@ -187,9 +187,9 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
        }
        de->ino = 0;
        mark_buffer_dirty_inode(bh, dir);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
-       inode->i_ctime = dir->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
        inode_dec_link_count(inode);
        error = 0;
 
@@ -240,10 +240,10 @@ static int bfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                        goto end_rename;
        }
        old_de->ino = 0;
-       old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+       old_dir->i_mtime = inode_set_ctime_current(old_dir);
        mark_inode_dirty(old_dir);
        if (new_inode) {
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
                inode_dec_link_count(new_inode);
        }
        mark_buffer_dirty_inode(old_bh, old_dir);
@@ -292,9 +292,9 @@ static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino)
                                pos = (block - sblock) * BFS_BSIZE + off;
                                if (pos >= dir->i_size) {
                                        dir->i_size += BFS_DIRENT_SIZE;
-                                       dir->i_ctime = current_time(dir);
+                                       inode_set_ctime_current(dir);
                                }
-                               dir->i_mtime = current_time(dir);
+                               dir->i_mtime = inode_set_ctime_current(dir);
                                mark_inode_dirty(dir);
                                de->ino = cpu_to_le16((u16)ino);
                                for (i = 0; i < BFS_NAMELEN; i++)
index 1926bec..e6a76ae 100644 (file)
@@ -82,10 +82,9 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
        inode->i_blocks = BFS_FILEBLOCKS(di);
        inode->i_atime.tv_sec =  le32_to_cpu(di->i_atime);
        inode->i_mtime.tv_sec =  le32_to_cpu(di->i_mtime);
-       inode->i_ctime.tv_sec =  le32_to_cpu(di->i_ctime);
+       inode_set_ctime(inode, le32_to_cpu(di->i_ctime), 0);
        inode->i_atime.tv_nsec = 0;
        inode->i_mtime.tv_nsec = 0;
-       inode->i_ctime.tv_nsec = 0;
 
        brelse(bh);
        unlock_new_inode(inode);
@@ -143,7 +142,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
        di->i_nlink = cpu_to_le32(inode->i_nlink);
        di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
        di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
-       di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+       di->i_ctime = cpu_to_le32(inode_get_ctime(inode).tv_sec);
        i_sblock = BFS_I(inode)->i_sblock;
        di->i_sblock = cpu_to_le32(i_sblock);
        di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
index bb202ad..e0108d1 100644 (file)
@@ -547,8 +547,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
        if (inode) {
                inode->i_ino = get_next_ino();
                inode->i_mode = mode;
-               inode->i_atime = inode->i_mtime = inode->i_ctime =
-                       current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        }
        return inode;
 }
index 66fa9ab..3282adc 100644 (file)
@@ -49,9 +49,11 @@ config BTRFS_FS_POSIX_ACL
          If you don't know what Access Control Lists are, say N
 
 config BTRFS_FS_CHECK_INTEGRITY
-       bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
+       bool "Btrfs with integrity check tool compiled in (DEPRECATED)"
        depends on BTRFS_FS
        help
+         This feature has been deprecated and will be removed in 6.7.
+
          Adds code that examines all block write requests (including
          writes of the super block). The goal is to verify that the
          state of the filesystem on disk is always consistent, i.e.,
index ceadfc5..8cfc821 100644 (file)
@@ -3,6 +3,8 @@
 #ifndef BTRFS_ACCESSORS_H
 #define BTRFS_ACCESSORS_H
 
+#include <linux/stddef.h>
+
 struct btrfs_map_token {
        struct extent_buffer *eb;
        char *kaddr;
@@ -34,13 +36,13 @@ static inline void put_unaligned_le8(u8 val, void *p)
        read_extent_buffer(eb, (char *)(result),                        \
                           ((unsigned long)(ptr)) +                     \
                            offsetof(type, member),                     \
-                          sizeof(((type *)0)->member)))
+                           sizeof_field(type, member)))
 
 #define write_eb_member(eb, ptr, type, member, result) (\
        write_extent_buffer(eb, (char *)(result),                       \
                           ((unsigned long)(ptr)) +                     \
                            offsetof(type, member),                     \
-                          sizeof(((type *)0)->member)))
+                           sizeof_field(type, member)))
 
 #define DECLARE_BTRFS_SETGET_BITS(bits)                                        \
 u##bits btrfs_get_token_##bits(struct btrfs_map_token *token,          \
@@ -62,25 +64,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
 static inline u##bits btrfs_##name(const struct extent_buffer *eb,     \
                                   const type *s)                       \
 {                                                                      \
-       static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);  \
+       static_assert(sizeof(u##bits) == sizeof_field(type, member));   \
        return btrfs_get_##bits(eb, s, offsetof(type, member));         \
 }                                                                      \
 static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
                                    u##bits val)                        \
 {                                                                      \
-       static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);  \
+       static_assert(sizeof(u##bits) == sizeof_field(type, member));   \
        btrfs_set_##bits(eb, s, offsetof(type, member), val);           \
 }                                                                      \
 static inline u##bits btrfs_token_##name(struct btrfs_map_token *token,        \
                                         const type *s)                 \
 {                                                                      \
-       static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);  \
+       static_assert(sizeof(u##bits) == sizeof_field(type, member));   \
        return btrfs_get_token_##bits(token, s, offsetof(type, member));\
 }                                                                      \
 static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
                                          type *s, u##bits val)         \
 {                                                                      \
-       static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);  \
+       static_assert(sizeof(u##bits) == sizeof_field(type, member));   \
        btrfs_set_token_##bits(token, s, offsetof(type, member), val);  \
 }
 
@@ -111,17 +113,14 @@ static inline void btrfs_set_##name(type *s, u##bits val)         \
 static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
                                           struct btrfs_dev_item *s)
 {
-       static_assert(sizeof(u64) ==
-                     sizeof(((struct btrfs_dev_item *)0))->total_bytes);
-       return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
-                                           total_bytes));
+       static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes));
+       return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes));
 }
 static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
                                                struct btrfs_dev_item *s,
                                                u64 val)
 {
-       static_assert(sizeof(u64) ==
-                     sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+       static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes));
        WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
        btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
 }
index 79336fa..b7d54ef 100644 (file)
@@ -3373,7 +3373,6 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
                                struct btrfs_key *node_key,
                                struct btrfs_backref_node *cur)
 {
-       struct btrfs_fs_info *fs_info = cache->fs_info;
        struct btrfs_backref_edge *edge;
        struct btrfs_backref_node *exist;
        int ret;
@@ -3462,25 +3461,21 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
                        ret = handle_direct_tree_backref(cache, &key, cur);
                        if (ret < 0)
                                goto out;
-                       continue;
-               } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
-                       ret = -EINVAL;
-                       btrfs_print_v0_err(fs_info);
-                       btrfs_handle_fs_error(fs_info, ret, NULL);
-                       goto out;
-               } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
-                       continue;
+               } else if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
+                       /*
+                        * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref
+                        * offset means the root objectid. We need to search
+                        * the tree to get its parent bytenr.
+                        */
+                       ret = handle_indirect_tree_backref(cache, path, &key, node_key,
+                                                          cur);
+                       if (ret < 0)
+                               goto out;
                }
-
                /*
-                * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
-                * means the root objectid. We need to search the tree to get
-                * its parent bytenr.
+                * Unrecognized tree backref items (if it can pass tree-checker)
+                * would be ignored.
                 */
-               ret = handle_indirect_tree_backref(cache, path, &key, node_key,
-                                                  cur);
-               if (ret < 0)
-                       goto out;
        }
        ret = 0;
        cur->checked = 1;
index 48ae509..0cb1dee 100644 (file)
@@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
                                           u64 num_bytes)
 {
        struct btrfs_caching_control *caching_ctl;
+       int progress;
 
        caching_ctl = btrfs_get_caching_control(cache);
        if (!caching_ctl)
                return;
 
+       /*
+        * We've already failed to allocate from this block group, so even if
+        * there's enough space in the block group it isn't contiguous enough to
+        * allow for an allocation, so wait for at least the next wakeup tick,
+        * or for the thing to be done.
+        */
+       progress = atomic_read(&caching_ctl->progress);
+
        wait_event(caching_ctl->wait, btrfs_block_group_done(cache) ||
-                  (cache->free_space_ctl->free_space >= num_bytes));
+                  (progress != atomic_read(&caching_ctl->progress) &&
+                   (cache->free_space_ctl->free_space >= num_bytes)));
 
        btrfs_put_caching_control(caching_ctl);
 }
@@ -494,33 +504,45 @@ static void fragment_free_space(struct btrfs_block_group *block_group)
 #endif
 
 /*
- * This is only called by btrfs_cache_block_group, since we could have freed
- * extents we need to check the pinned_extents for any extents that can't be
- * used yet since their free space will be released as soon as the transaction
- * commits.
+ * Add a free space range to the in memory free space cache of a block group.
+ * This checks if the range contains super block locations and any such
+ * locations are not added to the free space cache.
+ *
+ * @block_group:      The target block group.
+ * @start:            Start offset of the range.
+ * @end:              End offset of the range (exclusive).
+ * @total_added_ret:  Optional pointer to return the total amount of space
+ *                    added to the block group's free space cache.
+ *
+ * Returns 0 on success or < 0 on error.
  */
-u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end)
+int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start,
+                            u64 end, u64 *total_added_ret)
 {
        struct btrfs_fs_info *info = block_group->fs_info;
-       u64 extent_start, extent_end, size, total_added = 0;
+       u64 extent_start, extent_end, size;
        int ret;
 
+       if (total_added_ret)
+               *total_added_ret = 0;
+
        while (start < end) {
-               ret = find_first_extent_bit(&info->excluded_extents, start,
-                                           &extent_start, &extent_end,
-                                           EXTENT_DIRTY | EXTENT_UPTODATE,
-                                           NULL);
-               if (ret)
+               if (!find_first_extent_bit(&info->excluded_extents, start,
+                                          &extent_start, &extent_end,
+                                          EXTENT_DIRTY | EXTENT_UPTODATE,
+                                          NULL))
                        break;
 
                if (extent_start <= start) {
                        start = extent_end + 1;
                } else if (extent_start > start && extent_start < end) {
                        size = extent_start - start;
-                       total_added += size;
                        ret = btrfs_add_free_space_async_trimmed(block_group,
                                                                 start, size);
-                       BUG_ON(ret); /* -ENOMEM or logic error */
+                       if (ret)
+                               return ret;
+                       if (total_added_ret)
+                               *total_added_ret += size;
                        start = extent_end + 1;
                } else {
                        break;
@@ -529,13 +551,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
 
        if (start < end) {
                size = end - start;
-               total_added += size;
                ret = btrfs_add_free_space_async_trimmed(block_group, start,
                                                         size);
-               BUG_ON(ret); /* -ENOMEM or logic error */
+               if (ret)
+                       return ret;
+               if (total_added_ret)
+                       *total_added_ret += size;
        }
 
-       return total_added;
+       return 0;
 }
 
 /*
@@ -779,8 +803,13 @@ next:
 
                if (key.type == BTRFS_EXTENT_ITEM_KEY ||
                    key.type == BTRFS_METADATA_ITEM_KEY) {
-                       total_found += add_new_free_space(block_group, last,
-                                                         key.objectid);
+                       u64 space_added;
+
+                       ret = btrfs_add_new_free_space(block_group, last,
+                                                      key.objectid, &space_added);
+                       if (ret)
+                               goto out;
+                       total_found += space_added;
                        if (key.type == BTRFS_METADATA_ITEM_KEY)
                                last = key.objectid +
                                        fs_info->nodesize;
@@ -789,22 +818,29 @@ next:
 
                        if (total_found > CACHING_CTL_WAKE_UP) {
                                total_found = 0;
-                               if (wakeup)
+                               if (wakeup) {
+                                       atomic_inc(&caching_ctl->progress);
                                        wake_up(&caching_ctl->wait);
+                               }
                        }
                }
                path->slots[0]++;
        }
-       ret = 0;
-
-       total_found += add_new_free_space(block_group, last,
-                               block_group->start + block_group->length);
 
+       ret = btrfs_add_new_free_space(block_group, last,
+                                      block_group->start + block_group->length,
+                                      NULL);
 out:
        btrfs_free_path(path);
        return ret;
 }
 
+static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg)
+{
+       clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
+                         bg->start + bg->length - 1, EXTENT_UPTODATE);
+}
+
 static noinline void caching_thread(struct btrfs_work *work)
 {
        struct btrfs_block_group *block_group;
@@ -898,6 +934,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
        init_waitqueue_head(&caching_ctl->wait);
        caching_ctl->block_group = cache;
        refcount_set(&caching_ctl->count, 2);
+       atomic_set(&caching_ctl->progress, 0);
        btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
 
        spin_lock(&cache->lock);
@@ -1640,13 +1677,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
 {
        struct btrfs_fs_info *fs_info = bg->fs_info;
 
-       trace_btrfs_add_unused_block_group(bg);
        spin_lock(&fs_info->unused_bgs_lock);
        if (list_empty(&bg->bg_list)) {
                btrfs_get_block_group(bg);
+               trace_btrfs_add_unused_block_group(bg);
                list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
-       } else {
+       } else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) {
                /* Pull out the block group from the reclaim_bgs list. */
+               trace_btrfs_add_unused_block_group(bg);
                list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
        }
        spin_unlock(&fs_info->unused_bgs_lock);
@@ -2072,8 +2110,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
        if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
                stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
                cache->bytes_super += stripe_len;
-               ret = btrfs_add_excluded_extent(fs_info, cache->start,
-                                               stripe_len);
+               ret = set_extent_bit(&fs_info->excluded_extents, cache->start,
+                                    cache->start + stripe_len - 1,
+                                    EXTENT_UPTODATE, NULL);
                if (ret)
                        return ret;
        }
@@ -2087,6 +2126,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
 
                /* Shouldn't have super stripes in sequential zones */
                if (zoned && nr) {
+                       kfree(logical);
                        btrfs_err(fs_info,
                        "zoned: block group %llu must not contain super block",
                                  cache->start);
@@ -2098,8 +2138,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
                                cache->start + cache->length - logical[nr]);
 
                        cache->bytes_super += len;
-                       ret = btrfs_add_excluded_extent(fs_info, logical[nr],
-                                                       len);
+                       ret = set_extent_bit(&fs_info->excluded_extents, logical[nr],
+                                            logical[nr] + len - 1,
+                                            EXTENT_UPTODATE, NULL);
                        if (ret) {
                                kfree(logical);
                                return ret;
@@ -2292,9 +2333,11 @@ static int read_one_block_group(struct btrfs_fs_info *info,
                btrfs_free_excluded_extents(cache);
        } else if (cache->used == 0) {
                cache->cached = BTRFS_CACHE_FINISHED;
-               add_new_free_space(cache, cache->start,
-                                  cache->start + cache->length);
+               ret = btrfs_add_new_free_space(cache, cache->start,
+                                              cache->start + cache->length, NULL);
                btrfs_free_excluded_extents(cache);
+               if (ret)
+                       goto error;
        }
 
        ret = btrfs_add_block_group_cache(info, cache);
@@ -2668,6 +2711,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
 next:
                btrfs_delayed_refs_rsv_release(fs_info, 1);
                list_del_init(&block_group->bg_list);
+               clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
        }
        btrfs_trans_release_chunk_metadata(trans);
 }
@@ -2707,6 +2751,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
        if (!cache)
                return ERR_PTR(-ENOMEM);
 
+       /*
+        * Mark it as new before adding it to the rbtree of block groups or any
+        * list, so that no other task finds it and calls btrfs_mark_bg_unused()
+        * before the new flag is set.
+        */
+       set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags);
+
        cache->length = size;
        set_free_space_tree_thresholds(cache);
        cache->flags = type;
@@ -2730,9 +2781,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
                return ERR_PTR(ret);
        }
 
-       add_new_free_space(cache, chunk_offset, chunk_offset + size);
-
+       ret = btrfs_add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
        btrfs_free_excluded_extents(cache);
+       if (ret) {
+               btrfs_put_block_group(cache);
+               return ERR_PTR(ret);
+       }
 
        /*
         * Ensure the corresponding space_info object is created and
@@ -4035,7 +4089,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
 
        if (IS_ERR(ret_bg)) {
                ret = PTR_ERR(ret_bg);
-       } else if (from_extent_allocation) {
+       } else if (from_extent_allocation && (flags & BTRFS_BLOCK_GROUP_DATA)) {
                /*
                 * New block group is likely to be used soon. Try to activate
                 * it now. Failure is OK for now.
@@ -4233,6 +4287,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        struct btrfs_caching_control *caching_ctl;
        struct rb_node *n;
 
+       if (btrfs_is_zoned(info)) {
+               if (info->active_meta_bg) {
+                       btrfs_put_block_group(info->active_meta_bg);
+                       info->active_meta_bg = NULL;
+               }
+               if (info->active_system_bg) {
+                       btrfs_put_block_group(info->active_system_bg);
+                       info->active_system_bg = NULL;
+               }
+       }
+
        write_lock(&info->block_group_cache_lock);
        while (!list_empty(&info->caching_block_groups)) {
                caching_ctl = list_entry(info->caching_block_groups.next,
index f204add..2bdbcb8 100644 (file)
@@ -70,6 +70,11 @@ enum btrfs_block_group_flags {
        BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
        /* Indicate that the block group is placed on a sequential zone */
        BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
+       /*
+        * Indicate that block group is in the list of new block groups of a
+        * transaction.
+        */
+       BLOCK_GROUP_FLAG_NEW,
 };
 
 enum btrfs_caching_type {
@@ -85,6 +90,8 @@ struct btrfs_caching_control {
        wait_queue_head_t wait;
        struct btrfs_work work;
        struct btrfs_block_group *block_group;
+       /* Track progress of caching during allocation. */
+       atomic_t progress;
        refcount_t count;
 };
 
@@ -284,8 +291,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
 void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
 struct btrfs_caching_control *btrfs_get_caching_control(
                struct btrfs_block_group *cache);
-u64 add_new_free_space(struct btrfs_block_group *block_group,
-                      u64 start, u64 end);
+int btrfs_add_new_free_space(struct btrfs_block_group *block_group,
+                            u64 start, u64 end, u64 *total_added_ret);
 struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
                                struct btrfs_fs_info *fs_info,
                                const u64 chunk_offset);
index 6279d20..77684c5 100644 (file)
@@ -349,6 +349,11 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
        }
        read_unlock(&fs_info->global_root_lock);
 
+       if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
+               num_bytes += btrfs_root_used(&fs_info->block_group_root->root_item);
+               min_items++;
+       }
+
        /*
         * But we also want to reserve enough space so we can do the fallback
         * global reserve for an unlink, which is an additional
index d47a927..bda1fdb 100644 (file)
@@ -498,12 +498,8 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
                                    u64 start, u64 num_bytes, u64 min_size,
                                    loff_t actual_len, u64 *alloc_hint);
 int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
-                            u64 start, u64 end, int *page_started,
-                            unsigned long *nr_written, struct writeback_control *wbc);
+                            u64 start, u64 end, struct writeback_control *wbc);
 int btrfs_writepage_cow_fixup(struct page *page);
-void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
-                                         struct page *page, u64 start,
-                                         u64 end, bool uptodate);
 int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
                                             int compress_type);
 int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
index f2d2b31..9419f4e 100644 (file)
@@ -443,6 +443,7 @@ struct btrfs_drop_extents_args {
 
 struct btrfs_file_private {
        void *filldir_buf;
+       u64 last_index;
        struct extent_state *llseek_cached_state;
 };
 
index 6b457b0..53c1211 100644 (file)
@@ -1632,6 +1632,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
 }
 
 bool btrfs_readdir_get_delayed_items(struct inode *inode,
+                                    u64 last_index,
                                     struct list_head *ins_list,
                                     struct list_head *del_list)
 {
@@ -1651,14 +1652,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
 
        mutex_lock(&delayed_node->mutex);
        item = __btrfs_first_delayed_insertion_item(delayed_node);
-       while (item) {
+       while (item && item->index <= last_index) {
                refcount_inc(&item->refs);
                list_add_tail(&item->readdir_list, ins_list);
                item = __btrfs_next_delayed_item(item);
        }
 
        item = __btrfs_first_delayed_deletion_item(delayed_node);
-       while (item) {
+       while (item && item->index <= last_index) {
                refcount_inc(&item->refs);
                list_add_tail(&item->readdir_list, del_list);
                item = __btrfs_next_delayed_item(item);
@@ -1735,9 +1736,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
        int over = 0;
        unsigned char d_type;
 
-       if (list_empty(ins_list))
-               return 0;
-
        /*
         * Changing the data of the delayed item is impossible. So
         * we needn't lock them. And we have held i_mutex of the
@@ -1808,9 +1806,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
                                      inode->i_mtime.tv_nsec);
 
        btrfs_set_stack_timespec_sec(&inode_item->ctime,
-                                    inode->i_ctime.tv_sec);
+                                    inode_get_ctime(inode).tv_sec);
        btrfs_set_stack_timespec_nsec(&inode_item->ctime,
-                                     inode->i_ctime.tv_nsec);
+                                     inode_get_ctime(inode).tv_nsec);
 
        btrfs_set_stack_timespec_sec(&inode_item->otime,
                                     BTRFS_I(inode)->i_otime.tv_sec);
@@ -1861,8 +1859,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
        inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
        inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
 
-       inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
-       inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
+       inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime),
+                       btrfs_stack_timespec_nsec(&inode_item->ctime));
 
        BTRFS_I(inode)->i_otime.tv_sec =
                btrfs_stack_timespec_sec(&inode_item->otime);
index 4f21daa..dc1085b 100644 (file)
@@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
 
 /* Used for readdir() */
 bool btrfs_readdir_get_delayed_items(struct inode *inode,
+                                    u64 last_index,
                                     struct list_head *ins_list,
                                     struct list_head *del_list);
 void btrfs_readdir_put_delayed_items(struct inode *inode,
index 5f10965..fff22ed 100644 (file)
@@ -792,9 +792,9 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
 
        lockdep_assert_held(&srcdev->fs_info->chunk_mutex);
 
-       while (!find_first_extent_bit(&srcdev->alloc_state, start,
-                                     &found_start, &found_end,
-                                     CHUNK_ALLOCATED, &cached_state)) {
+       while (find_first_extent_bit(&srcdev->alloc_state, start,
+                                    &found_start, &found_end,
+                                    CHUNK_ALLOCATED, &cached_state)) {
                ret = set_extent_bit(&tgtdev->alloc_state, found_start,
                                     found_end, CHUNK_ALLOCATED, NULL);
                if (ret)
index 7513388..0a96ea8 100644 (file)
@@ -313,21 +313,16 @@ static bool check_tree_block_fsid(struct extent_buffer *eb)
        struct btrfs_fs_info *fs_info = eb->fs_info;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
        u8 fsid[BTRFS_FSID_SIZE];
-       u8 *metadata_uuid;
 
        read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
                           BTRFS_FSID_SIZE);
+
        /*
-        * Checking the incompat flag is only valid for the current fs. For
-        * seed devices it's forbidden to have their uuid changed so reading
-        * ->fsid in this case is fine
+        * alloc_fs_devices() copies the fsid into metadata_uuid if the
+        * metadata_uuid is unset in the superblock, including for a seed device.
+        * So, we can use fs_devices->metadata_uuid.
         */
-       if (btrfs_fs_incompat(fs_info, METADATA_UUID))
-               metadata_uuid = fs_devices->metadata_uuid;
-       else
-               metadata_uuid = fs_devices->fsid;
-
-       if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE))
+       if (memcmp(fsid, fs_info->fs_devices->metadata_uuid, BTRFS_FSID_SIZE) == 0)
                return false;
 
        list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
@@ -1103,7 +1098,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
        btrfs_drew_lock_init(&root->snapshot_lock);
 
        if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
-           !btrfs_is_data_reloc_root(root)) {
+           !btrfs_is_data_reloc_root(root) &&
+           is_fstree(root->root_key.objectid)) {
                set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
                btrfs_check_and_init_root_item(&root->root_item);
        }
@@ -1300,6 +1296,16 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
        root = btrfs_get_global_root(fs_info, objectid);
        if (root)
                return root;
+
+       /*
+        * If we're called for non-subvolume trees, and above function didn't
+        * find one, do not try to read it from disk.
+        *
+        * This is namely for free-space-tree and quota tree, which can change
+        * at runtime and should only be grabbed from fs_info.
+        */
+       if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+               return ERR_PTR(-ENOENT);
 again:
        root = btrfs_lookup_fs_root(fs_info, objectid);
        if (root) {
@@ -2373,21 +2379,18 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
                ret = -EINVAL;
        }
 
-       if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
-                  BTRFS_FSID_SIZE)) {
+       if (memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) {
                btrfs_err(fs_info,
                "superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
-                       fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
+                         sb->fsid, fs_info->fs_devices->fsid);
                ret = -EINVAL;
        }
 
-       if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
-           memcmp(fs_info->fs_devices->metadata_uuid,
-                  fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
+       if (memcmp(fs_info->fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(sb),
+                  BTRFS_FSID_SIZE) != 0) {
                btrfs_err(fs_info,
 "superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
-                       fs_info->super_copy->metadata_uuid,
-                       fs_info->fs_devices->metadata_uuid);
+                         btrfs_sb_fsid_ptr(sb), fs_info->fs_devices->metadata_uuid);
                ret = -EINVAL;
        }
 
@@ -2858,6 +2861,56 @@ static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
        return 0;
 }
 
+static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
+{
+       u64 root_objectid = 0;
+       struct btrfs_root *gang[8];
+       int i = 0;
+       int err = 0;
+       unsigned int ret = 0;
+
+       while (1) {
+               spin_lock(&fs_info->fs_roots_radix_lock);
+               ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+                                            (void **)gang, root_objectid,
+                                            ARRAY_SIZE(gang));
+               if (!ret) {
+                       spin_unlock(&fs_info->fs_roots_radix_lock);
+                       break;
+               }
+               root_objectid = gang[ret - 1]->root_key.objectid + 1;
+
+               for (i = 0; i < ret; i++) {
+                       /* Avoid to grab roots in dead_roots. */
+                       if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+                               gang[i] = NULL;
+                               continue;
+                       }
+                       /* Grab all the search result for later use. */
+                       gang[i] = btrfs_grab_root(gang[i]);
+               }
+               spin_unlock(&fs_info->fs_roots_radix_lock);
+
+               for (i = 0; i < ret; i++) {
+                       if (!gang[i])
+                               continue;
+                       root_objectid = gang[i]->root_key.objectid;
+                       err = btrfs_orphan_cleanup(gang[i]);
+                       if (err)
+                               goto out;
+                       btrfs_put_root(gang[i]);
+               }
+               root_objectid++;
+       }
+out:
+       /* Release the uncleaned roots due to error. */
+       for (; i < ret; i++) {
+               if (gang[i])
+                       btrfs_put_root(gang[i]);
+       }
+       return err;
+}
+
 /*
  * Some options only have meaning at mount time and shouldn't persist across
  * remounts, or be displayed. Clear these at the end of mount and remount
@@ -3211,7 +3264,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 
        /* check FS state, whether FS is broken. */
        if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
-               set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
+               WRITE_ONCE(fs_info->fs_error, -EUCLEAN);
 
        /*
         * In the long term, we'll store the compression type in the super
@@ -3406,6 +3459,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 
        btrfs_free_zone_cache(fs_info);
 
+       btrfs_check_active_zone_reservation(fs_info);
+
        if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
            !btrfs_check_rw_degradable(fs_info, NULL)) {
                btrfs_warn(fs_info,
@@ -3438,11 +3493,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
         * For devices supporting discard turn on discard=async automatically,
         * unless it's already set or disabled. This could be turned off by
         * nodiscard for the same mount.
+        *
+        * The zoned mode piggy backs on the discard functionality for
+        * resetting a zone. There is no reason to delay the zone reset as it is
+        * fast enough. So, do not enable async discard for zoned mode.
         */
        if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) ||
              btrfs_test_opt(fs_info, DISCARD_ASYNC) ||
              btrfs_test_opt(fs_info, NODISCARD)) &&
-           fs_info->fs_devices->discardable) {
+           fs_info->fs_devices->discardable &&
+           !btrfs_is_zoned(fs_info)) {
                btrfs_set_and_info(fs_info, DISCARD_ASYNC,
                                   "auto enabling async discard");
        }
@@ -4120,56 +4180,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
                btrfs_put_root(root);
 }
 
-int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
-{
-       u64 root_objectid = 0;
-       struct btrfs_root *gang[8];
-       int i = 0;
-       int err = 0;
-       unsigned int ret = 0;
-
-       while (1) {
-               spin_lock(&fs_info->fs_roots_radix_lock);
-               ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
-                                            (void **)gang, root_objectid,
-                                            ARRAY_SIZE(gang));
-               if (!ret) {
-                       spin_unlock(&fs_info->fs_roots_radix_lock);
-                       break;
-               }
-               root_objectid = gang[ret - 1]->root_key.objectid + 1;
-
-               for (i = 0; i < ret; i++) {
-                       /* Avoid to grab roots in dead_roots */
-                       if (btrfs_root_refs(&gang[i]->root_item) == 0) {
-                               gang[i] = NULL;
-                               continue;
-                       }
-                       /* grab all the search result for later use */
-                       gang[i] = btrfs_grab_root(gang[i]);
-               }
-               spin_unlock(&fs_info->fs_roots_radix_lock);
-
-               for (i = 0; i < ret; i++) {
-                       if (!gang[i])
-                               continue;
-                       root_objectid = gang[i]->root_key.objectid;
-                       err = btrfs_orphan_cleanup(gang[i]);
-                       if (err)
-                               goto out;
-                       btrfs_put_root(gang[i]);
-               }
-               root_objectid++;
-       }
-out:
-       /* release the uncleaned roots due to error */
-       for (; i < ret; i++) {
-               if (gang[i])
-                       btrfs_put_root(gang[i]);
-       }
-       return err;
-}
-
 int btrfs_commit_super(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_root *root = fs_info->tree_root;
@@ -4212,7 +4222,7 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
                u64 found_end;
 
                found = true;
-               while (!find_first_extent_bit(&trans->dirty_pages, cur,
+               while (find_first_extent_bit(&trans->dirty_pages, cur,
                        &found_start, &found_end, EXTENT_DIRTY, &cached)) {
                        dirty_bytes += found_end + 1 - found_start;
                        cur = found_end + 1;
@@ -4536,9 +4546,7 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
 static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_root *root;
-       struct list_head splice;
-
-       INIT_LIST_HEAD(&splice);
+       LIST_HEAD(splice);
 
        spin_lock(&fs_info->ordered_root_lock);
        list_splice_init(&fs_info->ordered_roots, &splice);
@@ -4644,9 +4652,7 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
 {
        struct btrfs_inode *btrfs_inode;
-       struct list_head splice;
-
-       INIT_LIST_HEAD(&splice);
+       LIST_HEAD(splice);
 
        spin_lock(&root->delalloc_lock);
        list_splice_init(&root->delalloc_inodes, &splice);
@@ -4679,9 +4685,7 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
 static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
 {
        struct btrfs_root *root;
-       struct list_head splice;
-
-       INIT_LIST_HEAD(&splice);
+       LIST_HEAD(splice);
 
        spin_lock(&fs_info->delalloc_root_lock);
        list_splice_init(&fs_info->delalloc_roots, &splice);
@@ -4700,21 +4704,16 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
        spin_unlock(&fs_info->delalloc_root_lock);
 }
 
-static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
-                                       struct extent_io_tree *dirty_pages,
-                                       int mark)
+static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
+                                        struct extent_io_tree *dirty_pages,
+                                        int mark)
 {
-       int ret;
        struct extent_buffer *eb;
        u64 start = 0;
        u64 end;
 
-       while (1) {
-               ret = find_first_extent_bit(dirty_pages, start, &start, &end,
-                                           mark, NULL);
-               if (ret)
-                       break;
-
+       while (find_first_extent_bit(dirty_pages, start, &start, &end,
+                                    mark, NULL)) {
                clear_extent_bits(dirty_pages, start, end, mark);
                while (start <= end) {
                        eb = find_extent_buffer(fs_info, start);
@@ -4730,16 +4729,13 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
                        free_extent_buffer_stale(eb);
                }
        }
-
-       return ret;
 }
 
-static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
-                                      struct extent_io_tree *unpin)
+static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
+                                       struct extent_io_tree *unpin)
 {
        u64 start;
        u64 end;
-       int ret;
 
        while (1) {
                struct extent_state *cached_state = NULL;
@@ -4751,9 +4747,8 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
                 * the same extent range.
                 */
                mutex_lock(&fs_info->unused_bg_unpin_mutex);
-               ret = find_first_extent_bit(unpin, 0, &start, &end,
-                                           EXTENT_DIRTY, &cached_state);
-               if (ret) {
+               if (!find_first_extent_bit(unpin, 0, &start, &end,
+                                          EXTENT_DIRTY, &cached_state)) {
                        mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                        break;
                }
@@ -4764,8 +4759,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
                mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                cond_resched();
        }
-
-       return 0;
 }
 
 static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
index b03767f..02b6457 100644 (file)
@@ -77,7 +77,6 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr);
 struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info);
 
 void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
-int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
 void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
 void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
 void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
index a2315a4..ff8e117 100644 (file)
@@ -831,15 +831,15 @@ static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *t
  *
  * Note: If there are multiple bits set in @bits, any of them will match.
  *
- * Return 0 if we find something, and update @start_ret and @end_ret.
- * Return 1 if we found nothing.
+ * Return true if we find something, and update @start_ret and @end_ret.
+ * Return false if we found nothing.
  */
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-                         u64 *start_ret, u64 *end_ret, u32 bits,
-                         struct extent_state **cached_state)
+bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+                          u64 *start_ret, u64 *end_ret, u32 bits,
+                          struct extent_state **cached_state)
 {
        struct extent_state *state;
-       int ret = 1;
+       bool ret = false;
 
        spin_lock(&tree->lock);
        if (cached_state && *cached_state) {
@@ -863,7 +863,7 @@ got_it:
                cache_state_if_flags(state, cached_state, 0);
                *start_ret = state->start;
                *end_ret = state->end;
-               ret = 0;
+               ret = true;
        }
 out:
        spin_unlock(&tree->lock);
index fbd3b27..28c23a2 100644 (file)
@@ -182,9 +182,9 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
                       u32 bits, u32 clear_bits,
                       struct extent_state **cached_state);
 
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
-                         u64 *start_ret, u64 *end_ret, u32 bits,
-                         struct extent_state **cached_state);
+bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+                          u64 *start_ret, u64 *end_ret, u32 bits,
+                          struct extent_state **cached_state);
 void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
                                 u64 *start_ret, u64 *end_ret, u32 bits);
 int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
index 911908e..f356f08 100644 (file)
@@ -69,27 +69,6 @@ static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
        return (cache->flags & bits) == bits;
 }
 
-int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
-                             u64 start, u64 num_bytes)
-{
-       u64 end = start + num_bytes - 1;
-       set_extent_bit(&fs_info->excluded_extents, start, end,
-                      EXTENT_UPTODATE, NULL);
-       return 0;
-}
-
-void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
-{
-       struct btrfs_fs_info *fs_info = cache->fs_info;
-       u64 start, end;
-
-       start = cache->start;
-       end = start + cache->length - 1;
-
-       clear_extent_bits(&fs_info->excluded_extents, start, end,
-                         EXTENT_UPTODATE);
-}
-
 /* simple helper to search for an existing data extent at a given offset */
 int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
 {
@@ -187,8 +166,10 @@ search_again:
                        num_refs = btrfs_extent_refs(leaf, ei);
                        extent_flags = btrfs_extent_flags(leaf, ei);
                } else {
-                       ret = -EINVAL;
-                       btrfs_print_v0_err(fs_info);
+                       ret = -EUCLEAN;
+                       btrfs_err(fs_info,
+                       "unexpected extent item size, has %u expect >= %zu",
+                                 item_size, sizeof(*ei));
                        if (trans)
                                btrfs_abort_transaction(trans, ret);
                        else
@@ -402,11 +383,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
                }
        }
 
+       WARN_ON(1);
        btrfs_print_leaf(eb);
        btrfs_err(eb->fs_info,
                  "eb %llu iref 0x%lx invalid extent inline ref type %d",
                  eb->start, (unsigned long)iref, type);
-       WARN_ON(1);
 
        return BTRFS_REF_TYPE_INVALID;
 }
@@ -624,12 +605,12 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
                ref2 = btrfs_item_ptr(leaf, path->slots[0],
                                      struct btrfs_shared_data_ref);
                num_refs = btrfs_shared_data_ref_count(leaf, ref2);
-       } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
-               btrfs_print_v0_err(trans->fs_info);
-               btrfs_abort_transaction(trans, -EINVAL);
-               return -EINVAL;
        } else {
-               BUG();
+               btrfs_err(trans->fs_info,
+                         "unrecognized backref key (%llu %u %llu)",
+                         key.objectid, key.type, key.offset);
+               btrfs_abort_transaction(trans, -EUCLEAN);
+               return -EUCLEAN;
        }
 
        BUG_ON(num_refs < refs_to_drop);
@@ -660,7 +641,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
        leaf = path->nodes[0];
        btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
 
-       BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
        if (iref) {
                /*
                 * If type is invalid, we should have bailed out earlier than
@@ -869,6 +849,11 @@ again:
                err = -ENOENT;
                goto out;
        } else if (WARN_ON(ret)) {
+               btrfs_print_leaf(path->nodes[0]);
+               btrfs_err(fs_info,
+"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
+                         bytenr, num_bytes, parent, root_objectid, owner,
+                         offset);
                err = -EIO;
                goto out;
        }
@@ -876,8 +861,10 @@ again:
        leaf = path->nodes[0];
        item_size = btrfs_item_size(leaf, path->slots[0]);
        if (unlikely(item_size < sizeof(*ei))) {
-               err = -EINVAL;
-               btrfs_print_v0_err(fs_info);
+               err = -EUCLEAN;
+               btrfs_err(fs_info,
+                         "unexpected extent item size, has %llu expect >= %zu",
+                         item_size, sizeof(*ei));
                btrfs_abort_transaction(trans, err);
                goto out;
        }
@@ -1079,13 +1066,13 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
 /*
  * helper to update/remove inline back ref
  */
-static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_path *path,
+static noinline_for_stack int update_inline_extent_backref(struct btrfs_path *path,
                                  struct btrfs_extent_inline_ref *iref,
                                  int refs_to_mod,
                                  struct btrfs_delayed_extent_op *extent_op)
 {
        struct extent_buffer *leaf = path->nodes[0];
+       struct btrfs_fs_info *fs_info = leaf->fs_info;
        struct btrfs_extent_item *ei;
        struct btrfs_extent_data_ref *dref = NULL;
        struct btrfs_shared_data_ref *sref = NULL;
@@ -1098,18 +1085,33 @@ void update_inline_extent_backref(struct btrfs_path *path,
 
        ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
        refs = btrfs_extent_refs(leaf, ei);
-       WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
+       if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) {
+               struct btrfs_key key;
+               u32 extent_size;
+
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               if (key.type == BTRFS_METADATA_ITEM_KEY)
+                       extent_size = fs_info->nodesize;
+               else
+                       extent_size = key.offset;
+               btrfs_print_leaf(leaf);
+               btrfs_err(fs_info,
+       "invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
+                         key.objectid, extent_size, refs_to_mod, refs);
+               return -EUCLEAN;
+       }
        refs += refs_to_mod;
        btrfs_set_extent_refs(leaf, ei, refs);
        if (extent_op)
                __run_delayed_extent_op(extent_op, leaf, ei);
 
+       type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
        /*
-        * If type is invalid, we should have bailed out after
-        * lookup_inline_extent_backref().
+        * Function btrfs_get_extent_inline_ref_type() has already printed
+        * error messages.
         */
-       type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
-       ASSERT(type != BTRFS_REF_TYPE_INVALID);
+       if (unlikely(type == BTRFS_REF_TYPE_INVALID))
+               return -EUCLEAN;
 
        if (type == BTRFS_EXTENT_DATA_REF_KEY) {
                dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -1119,10 +1121,43 @@ void update_inline_extent_backref(struct btrfs_path *path,
                refs = btrfs_shared_data_ref_count(leaf, sref);
        } else {
                refs = 1;
-               BUG_ON(refs_to_mod != -1);
+               /*
+                * For tree blocks we can only drop one ref for it, and tree
+                * blocks should not have refs > 1.
+                *
+                * Furthermore if we're inserting a new inline backref, we
+                * won't reach this path either. That would be
+                * setup_inline_extent_backref().
+                */
+               if (unlikely(refs_to_mod != -1)) {
+                       struct btrfs_key key;
+
+                       btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+                       btrfs_print_leaf(leaf);
+                       btrfs_err(fs_info,
+                       "invalid refs_to_mod for tree block %llu, has %d expect -1",
+                                 key.objectid, refs_to_mod);
+                       return -EUCLEAN;
+               }
        }
 
-       BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
+       if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) {
+               struct btrfs_key key;
+               u32 extent_size;
+
+               btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+               if (key.type == BTRFS_METADATA_ITEM_KEY)
+                       extent_size = fs_info->nodesize;
+               else
+                       extent_size = key.offset;
+               btrfs_print_leaf(leaf);
+               btrfs_err(fs_info,
+"invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu",
+                         (unsigned long)iref, key.objectid, extent_size,
+                         refs_to_mod, refs);
+               return -EUCLEAN;
+       }
        refs += refs_to_mod;
 
        if (refs > 0) {
@@ -1142,6 +1177,7 @@ void update_inline_extent_backref(struct btrfs_path *path,
                btrfs_truncate_item(path, item_size, 1);
        }
        btrfs_mark_buffer_dirty(leaf);
+       return 0;
 }
 
 static noinline_for_stack
@@ -1170,7 +1206,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
                                   bytenr, num_bytes, root_objectid, path->slots[0]);
                        return -EUCLEAN;
                }
-               update_inline_extent_backref(path, iref, refs_to_add, extent_op);
+               ret = update_inline_extent_backref(path, iref, refs_to_add, extent_op);
        } else if (ret == -ENOENT) {
                setup_inline_extent_backref(trans->fs_info, path, iref, parent,
                                            root_objectid, owner, offset,
@@ -1190,7 +1226,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 
        BUG_ON(!is_data && refs_to_drop != 1);
        if (iref)
-               update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
+               ret = update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
        else if (is_data)
                ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
        else
@@ -1629,8 +1665,10 @@ again:
        item_size = btrfs_item_size(leaf, path->slots[0]);
 
        if (unlikely(item_size < sizeof(*ei))) {
-               err = -EINVAL;
-               btrfs_print_v0_err(fs_info);
+               err = -EUCLEAN;
+               btrfs_err(fs_info,
+                         "unexpected extent item size, has %u expect >= %zu",
+                         item_size, sizeof(*ei));
                btrfs_abort_transaction(trans, err);
                goto out;
        }
@@ -2751,9 +2789,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
                struct extent_state *cached_state = NULL;
 
                mutex_lock(&fs_info->unused_bg_unpin_mutex);
-               ret = find_first_extent_bit(unpin, 0, &start, &end,
-                                           EXTENT_DIRTY, &cached_state);
-               if (ret) {
+               if (!find_first_extent_bit(unpin, 0, &start, &end,
+                                          EXTENT_DIRTY, &cached_state)) {
                        mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                        break;
                }
@@ -3059,8 +3096,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
        leaf = path->nodes[0];
        item_size = btrfs_item_size(leaf, extent_slot);
        if (unlikely(item_size < sizeof(*ei))) {
-               ret = -EINVAL;
-               btrfs_print_v0_err(info);
+               ret = -EUCLEAN;
+               btrfs_err(trans->fs_info,
+                         "unexpected extent item size, has %u expect >= %zu",
+                         item_size, sizeof(*ei));
                btrfs_abort_transaction(trans, ret);
                goto out;
        }
@@ -3351,11 +3390,38 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
 }
 
 enum btrfs_loop_type {
+       /*
+        * Start caching block groups but do not wait for progress or for them
+        * to be done.
+        */
        LOOP_CACHING_NOWAIT,
+
+       /*
+        * Wait for the block group free_space >= the space we're waiting for if
+        * the block group isn't cached.
+        */
        LOOP_CACHING_WAIT,
+
+       /*
+        * Allow allocations to happen from block groups that do not yet have a
+        * size classification.
+        */
        LOOP_UNSET_SIZE_CLASS,
+
+       /*
+        * Allocate a chunk and then retry the allocation.
+        */
        LOOP_ALLOC_CHUNK,
+
+       /*
+        * Ignore the size class restrictions for this allocation.
+        */
        LOOP_WRONG_SIZE_CLASS,
+
+       /*
+        * Ignore the empty size, only try to allocate the number of bytes
+        * needed for this allocation.
+        */
        LOOP_NO_EMPTY_SIZE,
 };
 
@@ -3427,7 +3493,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
  * Helper function for find_free_extent().
  *
  * Return -ENOENT to inform caller that we need fallback to unclustered mode.
- * Return -EAGAIN to inform caller that we need to re-search this block group
  * Return >0 to inform caller that we find nothing
  * Return 0 means we have found a location and set ffe_ctl->found_offset.
  */
@@ -3508,14 +3573,6 @@ refill_cluster:
                        trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
                        return 0;
                }
-       } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
-                  !ffe_ctl->retry_clustered) {
-               spin_unlock(&last_ptr->refill_lock);
-
-               ffe_ctl->retry_clustered = true;
-               btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
-                               ffe_ctl->empty_cluster + ffe_ctl->empty_size);
-               return -EAGAIN;
        }
        /*
         * At this point we either didn't find a cluster or we weren't able to
@@ -3530,7 +3587,6 @@ refill_cluster:
 /*
  * Return >0 to inform caller that we find nothing
  * Return 0 when we found an free extent and set ffe_ctrl->found_offset
- * Return -EAGAIN to inform caller that we need to re-search this block group
  */
 static int find_free_extent_unclustered(struct btrfs_block_group *bg,
                                        struct find_free_extent_ctl *ffe_ctl)
@@ -3568,25 +3624,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg,
        offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
                        ffe_ctl->num_bytes, ffe_ctl->empty_size,
                        &ffe_ctl->max_extent_size);
-
-       /*
-        * If we didn't find a chunk, and we haven't failed on this block group
-        * before, and this block group is in the middle of caching and we are
-        * ok with waiting, then go ahead and wait for progress to be made, and
-        * set @retry_unclustered to true.
-        *
-        * If @retry_unclustered is true then we've already waited on this
-        * block group once and should move on to the next block group.
-        */
-       if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
-           ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
-               btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
-                                                     ffe_ctl->empty_size);
-               ffe_ctl->retry_unclustered = true;
-               return -EAGAIN;
-       } else if (!offset) {
+       if (!offset)
                return 1;
-       }
        ffe_ctl->found_offset = offset;
        return 0;
 }
@@ -3600,7 +3639,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
        /* We want to try and use the cluster allocator, so lets look there */
        if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
                ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
-               if (ret >= 0 || ret == -EAGAIN)
+               if (ret >= 0)
                        return ret;
                /* ret == -ENOENT case falls through */
        }
@@ -3685,7 +3724,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
        }
        spin_unlock(&block_group->lock);
 
-       if (!ret && !btrfs_zone_activate(block_group)) {
+       /* Metadata block group is activated at write time. */
+       if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
+           !btrfs_zone_activate(block_group)) {
                ret = 1;
                /*
                 * May need to clear fs_info->{treelog,data_reloc}_bg.
@@ -3709,7 +3750,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
               fs_info->data_reloc_bg == 0);
 
        if (block_group->ro ||
-           test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+           (!ffe_ctl->for_data_reloc &&
+            test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
                ret = 1;
                goto out;
        }
@@ -3752,8 +3794,26 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
        if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
                fs_info->treelog_bg = block_group->start;
 
-       if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
-               fs_info->data_reloc_bg = block_group->start;
+       if (ffe_ctl->for_data_reloc) {
+               if (!fs_info->data_reloc_bg)
+                       fs_info->data_reloc_bg = block_group->start;
+               /*
+                * Do not allow allocations from this block group, unless it is
+                * for data relocation. Compared to increasing the ->ro, setting
+                * the ->zoned_data_reloc_ongoing flag still allows nocow
+                * writers to come in. See btrfs_inc_nocow_writers().
+                *
+                * We need to disable an allocation to avoid an allocation of
+                * regular (non-relocation data) extent. With mix of relocation
+                * extents and regular extents, we can dispatch WRITE commands
+                * (for relocation extents) and ZONE APPEND commands (for
+                * regular extents) at the same time to the same zone, which
+                * easily break the write pointer.
+                *
+                * Also, this flag avoids this block group to be zone finished.
+                */
+               set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
+       }
 
        ffe_ctl->found_offset = start + block_group->alloc_offset;
        block_group->alloc_offset += num_bytes;
@@ -3771,24 +3831,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
 out:
        if (ret && ffe_ctl->for_treelog)
                fs_info->treelog_bg = 0;
-       if (ret && ffe_ctl->for_data_reloc &&
-           fs_info->data_reloc_bg == block_group->start) {
-               /*
-                * Do not allow further allocations from this block group.
-                * Compared to increasing the ->ro, setting the
-                * ->zoned_data_reloc_ongoing flag still allows nocow
-                *  writers to come in. See btrfs_inc_nocow_writers().
-                *
-                * We need to disable an allocation to avoid an allocation of
-                * regular (non-relocation data) extent. With mix of relocation
-                * extents and regular extents, we can dispatch WRITE commands
-                * (for relocation extents) and ZONE APPEND commands (for
-                * regular extents) at the same time to the same zone, which
-                * easily break the write pointer.
-                */
-               set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
+       if (ret && ffe_ctl->for_data_reloc)
                fs_info->data_reloc_bg = 0;
-       }
        spin_unlock(&fs_info->relocation_bg_lock);
        spin_unlock(&fs_info->treelog_bg_lock);
        spin_unlock(&block_group->lock);
@@ -3816,8 +3860,7 @@ static void release_block_group(struct btrfs_block_group *block_group,
 {
        switch (ffe_ctl->policy) {
        case BTRFS_EXTENT_ALLOC_CLUSTERED:
-               ffe_ctl->retry_clustered = false;
-               ffe_ctl->retry_unclustered = false;
+               ffe_ctl->retry_uncached = false;
                break;
        case BTRFS_EXTENT_ALLOC_ZONED:
                /* Nothing to do */
@@ -3861,6 +3904,10 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
 static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
                                    struct find_free_extent_ctl *ffe_ctl)
 {
+       /* Block group's activeness is not a requirement for METADATA block groups. */
+       if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA))
+               return 0;
+
        /* If we can activate new zone, just allocate a chunk and use it */
        if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
                return 0;
@@ -3949,15 +3996,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
        if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
                return 1;
 
-       /*
-        * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
-        *                      caching kthreads as we move along
-        * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
-        * LOOP_UNSET_SIZE_CLASS, allow unset size class
-        * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
-        * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
-        *                     again
-        */
+       /* See the comments for btrfs_loop_type for an explanation of the phases. */
        if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
                ffe_ctl->index = 0;
                /*
@@ -4168,9 +4207,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
        ffe_ctl->orig_have_caching_bg = false;
        ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
        ffe_ctl->loop = 0;
-       /* For clustered allocation */
-       ffe_ctl->retry_clustered = false;
-       ffe_ctl->retry_unclustered = false;
+       ffe_ctl->retry_uncached = false;
        ffe_ctl->cached = 0;
        ffe_ctl->max_extent_size = 0;
        ffe_ctl->total_free_space = 0;
@@ -4310,24 +4347,23 @@ have_block_group:
                        ret = 0;
                }
 
-               if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
+               if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
+                       if (!cache_block_group_error)
+                               cache_block_group_error = -EIO;
                        goto loop;
+               }
 
                if (!find_free_extent_check_size_class(ffe_ctl, block_group))
                        goto loop;
 
                bg_ret = NULL;
                ret = do_allocation(block_group, ffe_ctl, &bg_ret);
-               if (ret == 0) {
-                       if (bg_ret && bg_ret != block_group) {
-                               btrfs_release_block_group(block_group,
-                                                         ffe_ctl->delalloc);
-                               block_group = bg_ret;
-                       }
-               } else if (ret == -EAGAIN) {
-                       goto have_block_group;
-               } else if (ret > 0) {
+               if (ret > 0)
                        goto loop;
+
+               if (bg_ret && bg_ret != block_group) {
+                       btrfs_release_block_group(block_group, ffe_ctl->delalloc);
+                       block_group = bg_ret;
                }
 
                /* Checks */
@@ -4368,6 +4404,15 @@ have_block_group:
                btrfs_release_block_group(block_group, ffe_ctl->delalloc);
                break;
 loop:
+               if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
+                   !ffe_ctl->retry_uncached) {
+                       ffe_ctl->retry_uncached = true;
+                       btrfs_wait_block_group_cache_progress(block_group,
+                                               ffe_ctl->num_bytes +
+                                               ffe_ctl->empty_cluster +
+                                               ffe_ctl->empty_size);
+                       goto have_block_group;
+               }
                release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
                cond_resched();
        }
index 429d5c5..88c249c 100644 (file)
@@ -48,16 +48,11 @@ struct find_free_extent_ctl {
        int loop;
 
        /*
-        * Whether we're refilling a cluster, if true we need to re-search
-        * current block group but don't try to refill the cluster again.
+        * Set to true if we're retrying the allocation on this block group
+        * after waiting for caching progress, this is so that we retry only
+        * once before moving on to another block group.
         */
-       bool retry_clustered;
-
-       /*
-        * Whether we're updating free space cache, if true we need to re-search
-        * current block group but don't try updating free space cache again.
-        */
-       bool retry_unclustered;
+       bool retry_uncached;
 
        /* If current block group is cached */
        int cached;
@@ -96,9 +91,6 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
                                     enum btrfs_inline_ref_type is_data);
 u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
 
-int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
-                             u64 start, u64 num_bytes);
-void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count);
 void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
                                  struct btrfs_delayed_ref_root *delayed_refs,
index a91d5ad..ac3fca5 100644 (file)
@@ -181,34 +181,9 @@ void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
        }
 }
 
-void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
-{
-       struct address_space *mapping = inode->i_mapping;
-       unsigned long index = start >> PAGE_SHIFT;
-       unsigned long end_index = end >> PAGE_SHIFT;
-       struct folio *folio;
-
-       while (index <= end_index) {
-               folio = filemap_get_folio(mapping, index);
-               filemap_dirty_folio(mapping, folio);
-               folio_account_redirty(folio);
-               index += folio_nr_pages(folio);
-               folio_put(folio);
-       }
-}
-
-/*
- * Process one page for __process_pages_contig().
- *
- * Return >0 if we hit @page == @locked_page.
- * Return 0 if we updated the page status.
- * Return -EGAIN if the we need to try again.
- * (For PAGE_LOCK case but got dirty page or page not belong to mapping)
- */
-static int process_one_page(struct btrfs_fs_info *fs_info,
-                           struct address_space *mapping,
-                           struct page *page, struct page *locked_page,
-                           unsigned long page_ops, u64 start, u64 end)
+static void process_one_page(struct btrfs_fs_info *fs_info,
+                            struct page *page, struct page *locked_page,
+                            unsigned long page_ops, u64 start, u64 end)
 {
        u32 len;
 
@@ -224,94 +199,36 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
        if (page_ops & PAGE_END_WRITEBACK)
                btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
 
-       if (page == locked_page)
-               return 1;
-
-       if (page_ops & PAGE_LOCK) {
-               int ret;
-
-               ret = btrfs_page_start_writer_lock(fs_info, page, start, len);
-               if (ret)
-                       return ret;
-               if (!PageDirty(page) || page->mapping != mapping) {
-                       btrfs_page_end_writer_lock(fs_info, page, start, len);
-                       return -EAGAIN;
-               }
-       }
-       if (page_ops & PAGE_UNLOCK)
+       if (page != locked_page && (page_ops & PAGE_UNLOCK))
                btrfs_page_end_writer_lock(fs_info, page, start, len);
-       return 0;
 }
 
-static int __process_pages_contig(struct address_space *mapping,
-                                 struct page *locked_page,
-                                 u64 start, u64 end, unsigned long page_ops,
-                                 u64 *processed_end)
+static void __process_pages_contig(struct address_space *mapping,
+                                  struct page *locked_page, u64 start, u64 end,
+                                  unsigned long page_ops)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
        pgoff_t start_index = start >> PAGE_SHIFT;
        pgoff_t end_index = end >> PAGE_SHIFT;
        pgoff_t index = start_index;
-       unsigned long pages_processed = 0;
        struct folio_batch fbatch;
-       int err = 0;
        int i;
 
-       if (page_ops & PAGE_LOCK) {
-               ASSERT(page_ops == PAGE_LOCK);
-               ASSERT(processed_end && *processed_end == start);
-       }
-
        folio_batch_init(&fbatch);
        while (index <= end_index) {
                int found_folios;
 
                found_folios = filemap_get_folios_contig(mapping, &index,
                                end_index, &fbatch);
-
-               if (found_folios == 0) {
-                       /*
-                        * Only if we're going to lock these pages, we can find
-                        * nothing at @index.
-                        */
-                       ASSERT(page_ops & PAGE_LOCK);
-                       err = -EAGAIN;
-                       goto out;
-               }
-
                for (i = 0; i < found_folios; i++) {
-                       int process_ret;
                        struct folio *folio = fbatch.folios[i];
-                       process_ret = process_one_page(fs_info, mapping,
-                                       &folio->page, locked_page, page_ops,
-                                       start, end);
-                       if (process_ret < 0) {
-                               err = -EAGAIN;
-                               folio_batch_release(&fbatch);
-                               goto out;
-                       }
-                       pages_processed += folio_nr_pages(folio);
+
+                       process_one_page(fs_info, &folio->page, locked_page,
+                                        page_ops, start, end);
                }
                folio_batch_release(&fbatch);
                cond_resched();
        }
-out:
-       if (err && processed_end) {
-               /*
-                * Update @processed_end. I know this is awful since it has
-                * two different return value patterns (inclusive vs exclusive).
-                *
-                * But the exclusive pattern is necessary if @start is 0, or we
-                * underflow and check against processed_end won't work as
-                * expected.
-                */
-               if (pages_processed)
-                       *processed_end = min(end,
-                       ((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
-               else
-                       *processed_end = start;
-       }
-       return err;
 }
 
 static noinline void __unlock_for_delalloc(struct inode *inode,
@@ -326,29 +243,63 @@ static noinline void __unlock_for_delalloc(struct inode *inode,
                return;
 
        __process_pages_contig(inode->i_mapping, locked_page, start, end,
-                              PAGE_UNLOCK, NULL);
+                              PAGE_UNLOCK);
 }
 
 static noinline int lock_delalloc_pages(struct inode *inode,
                                        struct page *locked_page,
-                                       u64 delalloc_start,
-                                       u64 delalloc_end)
+                                       u64 start,
+                                       u64 end)
 {
-       unsigned long index = delalloc_start >> PAGE_SHIFT;
-       unsigned long end_index = delalloc_end >> PAGE_SHIFT;
-       u64 processed_end = delalloc_start;
-       int ret;
+       struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct address_space *mapping = inode->i_mapping;
+       pgoff_t start_index = start >> PAGE_SHIFT;
+       pgoff_t end_index = end >> PAGE_SHIFT;
+       pgoff_t index = start_index;
+       u64 processed_end = start;
+       struct folio_batch fbatch;
 
-       ASSERT(locked_page);
        if (index == locked_page->index && index == end_index)
                return 0;
 
-       ret = __process_pages_contig(inode->i_mapping, locked_page, delalloc_start,
-                                    delalloc_end, PAGE_LOCK, &processed_end);
-       if (ret == -EAGAIN && processed_end > delalloc_start)
-               __unlock_for_delalloc(inode, locked_page, delalloc_start,
-                                     processed_end);
-       return ret;
+       folio_batch_init(&fbatch);
+       while (index <= end_index) {
+               unsigned int found_folios, i;
+
+               found_folios = filemap_get_folios_contig(mapping, &index,
+                               end_index, &fbatch);
+               if (found_folios == 0)
+                       goto out;
+
+               for (i = 0; i < found_folios; i++) {
+                       struct page *page = &fbatch.folios[i]->page;
+                       u32 len = end + 1 - start;
+
+                       if (page == locked_page)
+                               continue;
+
+                       if (btrfs_page_start_writer_lock(fs_info, page, start,
+                                                        len))
+                               goto out;
+
+                       if (!PageDirty(page) || page->mapping != mapping) {
+                               btrfs_page_end_writer_lock(fs_info, page, start,
+                                                          len);
+                               goto out;
+                       }
+
+                       processed_end = page_offset(page) + PAGE_SIZE - 1;
+               }
+               folio_batch_release(&fbatch);
+               cond_resched();
+       }
+
+       return 0;
+out:
+       folio_batch_release(&fbatch);
+       if (processed_end > start)
+               __unlock_for_delalloc(inode, locked_page, start, processed_end);
+       return -EAGAIN;
 }
 
 /*
@@ -467,7 +418,7 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
        clear_extent_bit(&inode->io_tree, start, end, clear_bits, NULL);
 
        __process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
-                              start, end, page_ops, NULL);
+                              start, end, page_ops);
 }
 
 static bool btrfs_verify_page(struct page *page, u64 start)
@@ -497,31 +448,6 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
                btrfs_subpage_end_reader(fs_info, page, start, len);
 }
 
-/* lots and lots of room for performance fixes in the end_bio funcs */
-
-void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
-{
-       struct btrfs_inode *inode;
-       const bool uptodate = (err == 0);
-       int ret = 0;
-
-       ASSERT(page && page->mapping);
-       inode = BTRFS_I(page->mapping->host);
-       btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
-
-       if (!uptodate) {
-               const struct btrfs_fs_info *fs_info = inode->root->fs_info;
-               u32 len;
-
-               ASSERT(end + 1 - start <= U32_MAX);
-               len = end + 1 - start;
-
-               btrfs_page_clear_uptodate(fs_info, page, start, len);
-               ret = err < 0 ? err : -EIO;
-               mapping_set_error(page->mapping, ret);
-       }
-}
-
 /*
  * after a writepage IO is done, we need to:
  * clear the uptodate bits on error
@@ -902,7 +828,30 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
                size -= len;
                pg_offset += len;
                disk_bytenr += len;
-               bio_ctrl->len_to_oe_boundary -= len;
+
+               /*
+                * len_to_oe_boundary defaults to U32_MAX, which isn't page or
+                * sector aligned.  alloc_new_bio() then sets it to the end of
+                * our ordered extent for writes into zoned devices.
+                *
+                * When len_to_oe_boundary is tracking an ordered extent, we
+                * trust the ordered extent code to align things properly, and
+                * the check above to cap our write to the ordered extent
+                * boundary is correct.
+                *
+                * When len_to_oe_boundary is U32_MAX, the cap above would
+                * result in a 4095 byte IO for the last page right before
+                * we hit the bio limit of UINT_MAX.  bio_add_page() has all
+                * the checks required to make sure we don't overflow the bio,
+                * and we should just ignore len_to_oe_boundary completely
+                * unless we're using it to track an ordered extent.
+                *
+                * It's pretty hard to make a bio sized U32_MAX, but it can
+                * happen when the page cache is able to feed us contiguous
+                * pages for large extents.
+                */
+               if (bio_ctrl->len_to_oe_boundary != U32_MAX)
+                       bio_ctrl->len_to_oe_boundary -= len;
 
                /* Ordered extent boundary: move on to a new bio. */
                if (bio_ctrl->len_to_oe_boundary == 0)
@@ -1220,38 +1169,45 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
 static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                struct page *page, struct writeback_control *wbc)
 {
-       const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
-       u64 delalloc_start = page_offset(page);
+       const u64 page_start = page_offset(page);
+       const u64 page_end = page_start + PAGE_SIZE - 1;
+       u64 delalloc_start = page_start;
+       u64 delalloc_end = page_end;
        u64 delalloc_to_write = 0;
-       /* How many pages are started by btrfs_run_delalloc_range() */
-       unsigned long nr_written = 0;
-       int ret;
-       int page_started = 0;
+       int ret = 0;
 
        while (delalloc_start < page_end) {
-               u64 delalloc_end = page_end;
-               bool found;
-
-               found = find_lock_delalloc_range(&inode->vfs_inode, page,
-                                              &delalloc_start,
-                                              &delalloc_end);
-               if (!found) {
+               delalloc_end = page_end;
+               if (!find_lock_delalloc_range(&inode->vfs_inode, page,
+                                             &delalloc_start, &delalloc_end)) {
                        delalloc_start = delalloc_end + 1;
                        continue;
                }
+
                ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
-                               delalloc_end, &page_started, &nr_written, wbc);
-               if (ret)
+                                              delalloc_end, wbc);
+               if (ret < 0)
                        return ret;
 
-               /*
-                * delalloc_end is already one less than the total length, so
-                * we don't subtract one from PAGE_SIZE
-                */
-               delalloc_to_write += (delalloc_end - delalloc_start +
-                                     PAGE_SIZE) >> PAGE_SHIFT;
                delalloc_start = delalloc_end + 1;
        }
+
+       /*
+        * delalloc_end is already one less than the total length, so
+        * we don't subtract one from PAGE_SIZE
+        */
+       delalloc_to_write +=
+               DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);
+
+       /*
+        * If btrfs_run_dealloc_range() already started I/O and unlocked
+        * the pages, we just need to account for them here.
+        */
+       if (ret == 1) {
+               wbc->nr_to_write -= delalloc_to_write;
+               return 1;
+       }
+
        if (wbc->nr_to_write < delalloc_to_write) {
                int thresh = 8192;
 
@@ -1261,16 +1217,6 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
                                         thresh);
        }
 
-       /* Did btrfs_run_dealloc_range() already unlock and start the IO? */
-       if (page_started) {
-               /*
-                * We've unlocked the page, so we can't update the mapping's
-                * writeback index, just update nr_to_write.
-                */
-               wbc->nr_to_write -= nr_written;
-               return 1;
-       }
-
        return 0;
 }
 
@@ -1359,6 +1305,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 
        bio_ctrl->end_io_func = end_bio_extent_writepage;
        while (cur <= end) {
+               u32 len = end - cur + 1;
                u64 disk_bytenr;
                u64 em_end;
                u64 dirty_range_start = cur;
@@ -1366,8 +1313,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
                u32 iosize;
 
                if (cur >= i_size) {
-                       btrfs_writepage_endio_finish_ordered(inode, page, cur,
-                                                            end, true);
+                       btrfs_mark_ordered_io_finished(inode, page, cur, len,
+                                                      true);
                        /*
                         * This range is beyond i_size, thus we don't need to
                         * bother writing back.
@@ -1376,7 +1323,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
                         * writeback the sectors with subpage dirty bits,
                         * causing writeback without ordered extent.
                         */
-                       btrfs_page_clear_dirty(fs_info, page, cur, end + 1 - cur);
+                       btrfs_page_clear_dirty(fs_info, page, cur, len);
                        break;
                }
 
@@ -1387,7 +1334,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
                        continue;
                }
 
-               em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
+               em = btrfs_get_extent(inode, NULL, 0, cur, len);
                if (IS_ERR(em)) {
                        ret = PTR_ERR_OR_ZERO(em);
                        goto out_error;
@@ -1463,7 +1410,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
        struct folio *folio = page_folio(page);
        struct inode *inode = page->mapping->host;
        const u64 page_start = page_offset(page);
-       const u64 page_end = page_start + PAGE_SIZE - 1;
        int ret;
        int nr = 0;
        size_t pg_offset;
@@ -1507,8 +1453,13 @@ done:
                set_page_writeback(page);
                end_page_writeback(page);
        }
-       if (ret)
-               end_extent_writepage(page, ret, page_start, page_end);
+       if (ret) {
+               btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start,
+                                              PAGE_SIZE, !ret);
+               btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page,
+                                         page_start, PAGE_SIZE);
+               mapping_set_error(page->mapping, ret);
+       }
        unlock_page(page);
        ASSERT(ret <= 0);
        return ret;
@@ -1854,11 +1805,10 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
  * previous call.
  * Return <0 for fatal error.
  */
-static int submit_eb_page(struct page *page, struct writeback_control *wbc,
-                         struct extent_buffer **eb_context)
+static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
 {
+       struct writeback_control *wbc = ctx->wbc;
        struct address_space *mapping = page->mapping;
-       struct btrfs_block_group *cache = NULL;
        struct extent_buffer *eb;
        int ret;
 
@@ -1885,7 +1835,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
                return 0;
        }
 
-       if (eb == *eb_context) {
+       if (eb == ctx->eb) {
                spin_unlock(&mapping->private_lock);
                return 0;
        }
@@ -1894,34 +1844,25 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
        if (!ret)
                return 0;
 
-       if (!btrfs_check_meta_write_pointer(eb->fs_info, eb, &cache)) {
-               /*
-                * If for_sync, this hole will be filled with
-                * trasnsaction commit.
-                */
-               if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
-                       ret = -EAGAIN;
-               else
+       ctx->eb = eb;
+
+       ret = btrfs_check_meta_write_pointer(eb->fs_info, ctx);
+       if (ret) {
+               if (ret == -EBUSY)
                        ret = 0;
                free_extent_buffer(eb);
                return ret;
        }
 
-       *eb_context = eb;
-
        if (!lock_extent_buffer_for_io(eb, wbc)) {
-               btrfs_revert_meta_write_pointer(cache, eb);
-               if (cache)
-                       btrfs_put_block_group(cache);
                free_extent_buffer(eb);
                return 0;
        }
-       if (cache) {
-               /*
-                * Implies write in zoned mode. Mark the last eb in a block group.
-                */
-               btrfs_schedule_zone_finish_bg(cache, eb);
-               btrfs_put_block_group(cache);
+       /* Implies write in zoned mode. */
+       if (ctx->zoned_bg) {
+               /* Mark the last eb in the block group. */
+               btrfs_schedule_zone_finish_bg(ctx->zoned_bg, eb);
+               ctx->zoned_bg->meta_write_pointer += eb->len;
        }
        write_one_eb(eb, wbc);
        free_extent_buffer(eb);
@@ -1931,7 +1872,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
 int btree_write_cache_pages(struct address_space *mapping,
                                   struct writeback_control *wbc)
 {
-       struct extent_buffer *eb_context = NULL;
+       struct btrfs_eb_write_context ctx = { .wbc = wbc };
        struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
        int ret = 0;
        int done = 0;
@@ -1973,7 +1914,7 @@ retry:
                for (i = 0; i < nr_folios; i++) {
                        struct folio *folio = fbatch.folios[i];
 
-                       ret = submit_eb_page(&folio->page, wbc, &eb_context);
+                       ret = submit_eb_page(&folio->page, &ctx);
                        if (ret == 0)
                                continue;
                        if (ret < 0) {
@@ -2034,6 +1975,9 @@ retry:
                ret = 0;
        if (!ret && BTRFS_FS_ERROR(fs_info))
                ret = -EROFS;
+
+       if (ctx.zoned_bg)
+               btrfs_put_block_group(ctx.zoned_bg);
        btrfs_zoned_meta_io_unlock(fs_info);
        return ret;
 }
@@ -2127,7 +2071,7 @@ retry:
                for (i = 0; i < nr_folios; i++) {
                        struct folio *folio = fbatch.folios[i];
 
-                       done_index = folio->index + folio_nr_pages(folio);
+                       done_index = folio_next_index(folio);
                        /*
                         * At this point we hold neither the i_pages lock nor
                         * the page lock: the page may be truncated or
@@ -2145,6 +2089,12 @@ retry:
                                continue;
                        }
 
+                       if (!folio_test_dirty(folio)) {
+                               /* Someone wrote it for us. */
+                               folio_unlock(folio);
+                               continue;
+                       }
+
                        if (wbc->sync_mode != WB_SYNC_NONE) {
                                if (folio_test_writeback(folio))
                                        submit_write_bio(bio_ctrl, 0);
@@ -2164,11 +2114,12 @@ retry:
                        }
 
                        /*
-                        * the filesystem may choose to bump up nr_to_write.
+                        * The filesystem may choose to bump up nr_to_write.
                         * We have to make sure to honor the new nr_to_write
-                        * at any time
+                        * at any time.
                         */
-                       nr_to_write_done = wbc->nr_to_write <= 0;
+                       nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
+                                           wbc->nr_to_write <= 0);
                }
                folio_batch_release(&fbatch);
                cond_resched();
@@ -2203,11 +2154,11 @@ retry:
  * already been ran (aka, ordered extent inserted) and all pages are still
  * locked.
  */
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
-                             struct writeback_control *wbc)
+void extent_write_locked_range(struct inode *inode, struct page *locked_page,
+                              u64 start, u64 end, struct writeback_control *wbc,
+                              bool pages_dirty)
 {
        bool found_error = false;
-       int first_error = 0;
        int ret = 0;
        struct address_space *mapping = inode->i_mapping;
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2226,18 +2177,16 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
 
        while (cur <= end) {
                u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
+               u32 cur_len = cur_end + 1 - cur;
                struct page *page;
                int nr = 0;
 
                page = find_get_page(mapping, cur >> PAGE_SHIFT);
-               /*
-                * All pages in the range are locked since
-                * btrfs_run_delalloc_range(), thus there is no way to clear
-                * the page dirty flag.
-                */
                ASSERT(PageLocked(page));
-               ASSERT(PageDirty(page));
-               clear_page_dirty_for_io(page);
+               if (pages_dirty && page != locked_page) {
+                       ASSERT(PageDirty(page));
+                       clear_page_dirty_for_io(page);
+               }
 
                ret = __extent_writepage_io(BTRFS_I(inode), page, &bio_ctrl,
                                            i_size, &nr);
@@ -2249,23 +2198,21 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
                        set_page_writeback(page);
                        end_page_writeback(page);
                }
-               if (ret)
-                       end_extent_writepage(page, ret, cur, cur_end);
-               btrfs_page_unlock_writer(fs_info, page, cur, cur_end + 1 - cur);
-               if (ret < 0) {
-                       found_error = true;
-                       first_error = ret;
+               if (ret) {
+                       btrfs_mark_ordered_io_finished(BTRFS_I(inode), page,
+                                                      cur, cur_len, !ret);
+                       btrfs_page_clear_uptodate(fs_info, page, cur, cur_len);
+                       mapping_set_error(page->mapping, ret);
                }
+               btrfs_page_unlock_writer(fs_info, page, cur, cur_len);
+               if (ret < 0)
+                       found_error = true;
 next_page:
                put_page(page);
                cur = cur_end + 1;
        }
 
        submit_write_bio(&bio_ctrl, found_error ? ret : 0);
-
-       if (found_error)
-               return first_error;
-       return ret;
 }
 
 int extent_writepages(struct address_space *mapping,
@@ -3285,8 +3232,8 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
                        return NULL;
                }
                WARN_ON(PageDirty(p));
-               copy_page(page_address(p), page_address(src->pages[i]));
        }
+       copy_extent_buffer_full(new, src);
        set_extent_buffer_uptodate(new);
 
        return new;
@@ -3529,6 +3476,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
        struct extent_buffer *exists = NULL;
        struct page *p;
        struct address_space *mapping = fs_info->btree_inode->i_mapping;
+       struct btrfs_subpage *prealloc = NULL;
        u64 lockdep_owner = owner_root;
        int uptodate = 1;
        int ret;
@@ -3565,36 +3513,30 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
        btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level);
 
        num_pages = num_extent_pages(eb);
-       for (i = 0; i < num_pages; i++, index++) {
-               struct btrfs_subpage *prealloc = NULL;
 
+       /*
+        * Preallocate page->private for subpage case, so that we won't
+        * allocate memory with private_lock nor page lock hold.
+        *
+        * The memory will be freed by attach_extent_buffer_page() or freed
+        * manually if we exit earlier.
+        */
+       if (fs_info->nodesize < PAGE_SIZE) {
+               prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
+               if (IS_ERR(prealloc)) {
+                       exists = ERR_CAST(prealloc);
+                       goto free_eb;
+               }
+       }
+
+       for (i = 0; i < num_pages; i++, index++) {
                p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
                if (!p) {
                        exists = ERR_PTR(-ENOMEM);
+                       btrfs_free_subpage(prealloc);
                        goto free_eb;
                }
 
-               /*
-                * Preallocate page->private for subpage case, so that we won't
-                * allocate memory with private_lock hold.  The memory will be
-                * freed by attach_extent_buffer_page() or freed manually if
-                * we exit earlier.
-                *
-                * Although we have ensured one subpage eb can only have one
-                * page, but it may change in the future for 16K page size
-                * support, so we still preallocate the memory in the loop.
-                */
-               if (fs_info->nodesize < PAGE_SIZE) {
-                       prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
-                       if (IS_ERR(prealloc)) {
-                               ret = PTR_ERR(prealloc);
-                               unlock_page(p);
-                               put_page(p);
-                               exists = ERR_PTR(ret);
-                               goto free_eb;
-                       }
-               }
-
                spin_lock(&mapping->private_lock);
                exists = grab_extent_buffer(fs_info, p);
                if (exists) {
@@ -4180,30 +4122,9 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
        }
 }
 
-void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
-               const void *srcv)
-{
-       char *kaddr;
-
-       assert_eb_page_uptodate(eb, eb->pages[0]);
-       kaddr = page_address(eb->pages[0]) +
-               get_eb_offset_in_page(eb, offsetof(struct btrfs_header,
-                                                  chunk_tree_uuid));
-       memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
-}
-
-void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
-{
-       char *kaddr;
-
-       assert_eb_page_uptodate(eb, eb->pages[0]);
-       kaddr = page_address(eb->pages[0]) +
-               get_eb_offset_in_page(eb, offsetof(struct btrfs_header, fsid));
-       memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
-}
-
-void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
-                        unsigned long start, unsigned long len)
+static void __write_extent_buffer(const struct extent_buffer *eb,
+                                 const void *srcv, unsigned long start,
+                                 unsigned long len, bool use_memmove)
 {
        size_t cur;
        size_t offset;
@@ -4211,6 +4132,8 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
        char *kaddr;
        char *src = (char *)srcv;
        unsigned long i = get_eb_page_index(start);
+       /* For unmapped (dummy) ebs, no need to check their uptodate status. */
+       const bool check_uptodate = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
 
        WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
 
@@ -4221,11 +4144,15 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
 
        while (len > 0) {
                page = eb->pages[i];
-               assert_eb_page_uptodate(eb, page);
+               if (check_uptodate)
+                       assert_eb_page_uptodate(eb, page);
 
                cur = min(len, PAGE_SIZE - offset);
                kaddr = page_address(page);
-               memcpy(kaddr + offset, src, cur);
+               if (use_memmove)
+                       memmove(kaddr + offset, src, cur);
+               else
+                       memcpy(kaddr + offset, src, cur);
 
                src += cur;
                len -= cur;
@@ -4234,55 +4161,54 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
        }
 }
 
-void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
-               unsigned long len)
+void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
+                        unsigned long start, unsigned long len)
 {
-       size_t cur;
-       size_t offset;
-       struct page *page;
-       char *kaddr;
-       unsigned long i = get_eb_page_index(start);
+       return __write_extent_buffer(eb, srcv, start, len, false);
+}
 
-       if (check_eb_range(eb, start, len))
-               return;
+static void memset_extent_buffer(const struct extent_buffer *eb, int c,
+                                unsigned long start, unsigned long len)
+{
+       unsigned long cur = start;
 
-       offset = get_eb_offset_in_page(eb, start);
+       while (cur < start + len) {
+               unsigned long index = get_eb_page_index(cur);
+               unsigned int offset = get_eb_offset_in_page(eb, cur);
+               unsigned int cur_len = min(start + len - cur, PAGE_SIZE - offset);
+               struct page *page = eb->pages[index];
 
-       while (len > 0) {
-               page = eb->pages[i];
                assert_eb_page_uptodate(eb, page);
+               memset(page_address(page) + offset, c, cur_len);
 
-               cur = min(len, PAGE_SIZE - offset);
-               kaddr = page_address(page);
-               memset(kaddr + offset, 0, cur);
-
-               len -= cur;
-               offset = 0;
-               i++;
+               cur += cur_len;
        }
 }
 
+void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
+                          unsigned long len)
+{
+       if (check_eb_range(eb, start, len))
+               return;
+       return memset_extent_buffer(eb, 0, start, len);
+}
+
 void copy_extent_buffer_full(const struct extent_buffer *dst,
                             const struct extent_buffer *src)
 {
-       int i;
-       int num_pages;
+       unsigned long cur = 0;
 
        ASSERT(dst->len == src->len);
 
-       if (dst->fs_info->nodesize >= PAGE_SIZE) {
-               num_pages = num_extent_pages(dst);
-               for (i = 0; i < num_pages; i++)
-                       copy_page(page_address(dst->pages[i]),
-                                 page_address(src->pages[i]));
-       } else {
-               size_t src_offset = get_eb_offset_in_page(src, 0);
-               size_t dst_offset = get_eb_offset_in_page(dst, 0);
+       while (cur < src->len) {
+               unsigned long index = get_eb_page_index(cur);
+               unsigned long offset = get_eb_offset_in_page(src, cur);
+               unsigned long cur_len = min(src->len, PAGE_SIZE - offset);
+               void *addr = page_address(src->pages[index]) + offset;
+
+               write_extent_buffer(dst, addr, cur, cur_len);
 
-               ASSERT(src->fs_info->nodesize < PAGE_SIZE);
-               memcpy(page_address(dst->pages[0]) + dst_offset,
-                      page_address(src->pages[0]) + src_offset,
-                      src->len);
+               cur += cur_len;
        }
 }
 
@@ -4376,6 +4302,15 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
        return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
 }
 
+static u8 *extent_buffer_get_byte(const struct extent_buffer *eb, unsigned long bytenr)
+{
+       unsigned long index = get_eb_page_index(bytenr);
+
+       if (check_eb_range(eb, bytenr, 1))
+               return NULL;
+       return page_address(eb->pages[index]) + get_eb_offset_in_page(eb, bytenr);
+}
+
 /*
  * Set an area of a bitmap to 1.
  *
@@ -4387,35 +4322,28 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
 void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
                              unsigned long pos, unsigned long len)
 {
+       unsigned int first_byte = start + BIT_BYTE(pos);
+       unsigned int last_byte = start + BIT_BYTE(pos + len - 1);
+       const bool same_byte = (first_byte == last_byte);
+       u8 mask = BITMAP_FIRST_BYTE_MASK(pos);
        u8 *kaddr;
-       struct page *page;
-       unsigned long i;
-       size_t offset;
-       const unsigned int size = pos + len;
-       int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-       u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
 
-       eb_bitmap_offset(eb, start, pos, &i, &offset);
-       page = eb->pages[i];
-       assert_eb_page_uptodate(eb, page);
-       kaddr = page_address(page);
+       if (same_byte)
+               mask &= BITMAP_LAST_BYTE_MASK(pos + len);
 
-       while (len >= bits_to_set) {
-               kaddr[offset] |= mask_to_set;
-               len -= bits_to_set;
-               bits_to_set = BITS_PER_BYTE;
-               mask_to_set = ~0;
-               if (++offset >= PAGE_SIZE && len > 0) {
-                       offset = 0;
-                       page = eb->pages[++i];
-                       assert_eb_page_uptodate(eb, page);
-                       kaddr = page_address(page);
-               }
-       }
-       if (len) {
-               mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
-               kaddr[offset] |= mask_to_set;
-       }
+       /* Handle the first byte. */
+       kaddr = extent_buffer_get_byte(eb, first_byte);
+       *kaddr |= mask;
+       if (same_byte)
+               return;
+
+       /* Handle the byte aligned part. */
+       ASSERT(first_byte + 1 <= last_byte);
+       memset_extent_buffer(eb, 0xff, first_byte + 1, last_byte - first_byte - 1);
+
+       /* Handle the last byte. */
+       kaddr = extent_buffer_get_byte(eb, last_byte);
+       *kaddr |= BITMAP_LAST_BYTE_MASK(pos + len);
 }
 
 
@@ -4431,35 +4359,28 @@ void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
                                unsigned long start, unsigned long pos,
                                unsigned long len)
 {
+       unsigned int first_byte = start + BIT_BYTE(pos);
+       unsigned int last_byte = start + BIT_BYTE(pos + len - 1);
+       const bool same_byte = (first_byte == last_byte);
+       u8 mask = BITMAP_FIRST_BYTE_MASK(pos);
        u8 *kaddr;
-       struct page *page;
-       unsigned long i;
-       size_t offset;
-       const unsigned int size = pos + len;
-       int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
-       u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
 
-       eb_bitmap_offset(eb, start, pos, &i, &offset);
-       page = eb->pages[i];
-       assert_eb_page_uptodate(eb, page);
-       kaddr = page_address(page);
+       if (same_byte)
+               mask &= BITMAP_LAST_BYTE_MASK(pos + len);
 
-       while (len >= bits_to_clear) {
-               kaddr[offset] &= ~mask_to_clear;
-               len -= bits_to_clear;
-               bits_to_clear = BITS_PER_BYTE;
-               mask_to_clear = ~0;
-               if (++offset >= PAGE_SIZE && len > 0) {
-                       offset = 0;
-                       page = eb->pages[++i];
-                       assert_eb_page_uptodate(eb, page);
-                       kaddr = page_address(page);
-               }
-       }
-       if (len) {
-               mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
-               kaddr[offset] &= ~mask_to_clear;
-       }
+       /* Handle the first byte. */
+       kaddr = extent_buffer_get_byte(eb, first_byte);
+       *kaddr &= ~mask;
+       if (same_byte)
+               return;
+
+       /* Handle the byte aligned part. */
+       ASSERT(first_byte + 1 <= last_byte);
+       memset_extent_buffer(eb, 0, first_byte + 1, last_byte - first_byte - 1);
+
+       /* Handle the last byte. */
+       kaddr = extent_buffer_get_byte(eb, last_byte);
+       *kaddr &= ~BITMAP_LAST_BYTE_MASK(pos + len);
 }
 
 static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -4468,60 +4389,29 @@ static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned
        return distance < len;
 }
 
-static void copy_pages(struct page *dst_page, struct page *src_page,
-                      unsigned long dst_off, unsigned long src_off,
-                      unsigned long len)
-{
-       char *dst_kaddr = page_address(dst_page);
-       char *src_kaddr;
-       int must_memmove = 0;
-
-       if (dst_page != src_page) {
-               src_kaddr = page_address(src_page);
-       } else {
-               src_kaddr = dst_kaddr;
-               if (areas_overlap(src_off, dst_off, len))
-                       must_memmove = 1;
-       }
-
-       if (must_memmove)
-               memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
-       else
-               memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-}
-
 void memcpy_extent_buffer(const struct extent_buffer *dst,
                          unsigned long dst_offset, unsigned long src_offset,
                          unsigned long len)
 {
-       size_t cur;
-       size_t dst_off_in_page;
-       size_t src_off_in_page;
-       unsigned long dst_i;
-       unsigned long src_i;
+       unsigned long cur_off = 0;
 
        if (check_eb_range(dst, dst_offset, len) ||
            check_eb_range(dst, src_offset, len))
                return;
 
-       while (len > 0) {
-               dst_off_in_page = get_eb_offset_in_page(dst, dst_offset);
-               src_off_in_page = get_eb_offset_in_page(dst, src_offset);
-
-               dst_i = get_eb_page_index(dst_offset);
-               src_i = get_eb_page_index(src_offset);
-
-               cur = min(len, (unsigned long)(PAGE_SIZE -
-                                              src_off_in_page));
-               cur = min_t(unsigned long, cur,
-                       (unsigned long)(PAGE_SIZE - dst_off_in_page));
-
-               copy_pages(dst->pages[dst_i], dst->pages[src_i],
-                          dst_off_in_page, src_off_in_page, cur);
-
-               src_offset += cur;
-               dst_offset += cur;
-               len -= cur;
+       while (cur_off < len) {
+               unsigned long cur_src = cur_off + src_offset;
+               unsigned long pg_index = get_eb_page_index(cur_src);
+               unsigned long pg_off = get_eb_offset_in_page(dst, cur_src);
+               unsigned long cur_len = min(src_offset + len - cur_src,
+                                           PAGE_SIZE - pg_off);
+               void *src_addr = page_address(dst->pages[pg_index]) + pg_off;
+               const bool use_memmove = areas_overlap(src_offset + cur_off,
+                                                      dst_offset + cur_off, cur_len);
+
+               __write_extent_buffer(dst, src_addr, dst_offset + cur_off, cur_len,
+                                     use_memmove);
+               cur_off += cur_len;
        }
 }
 
@@ -4529,23 +4419,26 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
                           unsigned long dst_offset, unsigned long src_offset,
                           unsigned long len)
 {
-       size_t cur;
-       size_t dst_off_in_page;
-       size_t src_off_in_page;
        unsigned long dst_end = dst_offset + len - 1;
        unsigned long src_end = src_offset + len - 1;
-       unsigned long dst_i;
-       unsigned long src_i;
 
        if (check_eb_range(dst, dst_offset, len) ||
            check_eb_range(dst, src_offset, len))
                return;
+
        if (dst_offset < src_offset) {
                memcpy_extent_buffer(dst, dst_offset, src_offset, len);
                return;
        }
+
        while (len > 0) {
-               dst_i = get_eb_page_index(dst_end);
+               unsigned long src_i;
+               size_t cur;
+               size_t dst_off_in_page;
+               size_t src_off_in_page;
+               void *src_addr;
+               bool use_memmove;
+
                src_i = get_eb_page_index(src_end);
 
                dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
@@ -4553,9 +4446,14 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
 
                cur = min_t(unsigned long, len, src_off_in_page + 1);
                cur = min(cur, dst_off_in_page + 1);
-               copy_pages(dst->pages[dst_i], dst->pages[src_i],
-                          dst_off_in_page - cur + 1,
-                          src_off_in_page - cur + 1, cur);
+
+               src_addr = page_address(dst->pages[src_i]) + src_off_in_page -
+                                       cur + 1;
+               use_memmove = areas_overlap(src_end - cur + 1, dst_end - cur + 1,
+                                           cur);
+
+               __write_extent_buffer(dst, src_addr, dst_end - cur + 1, cur,
+                                     use_memmove);
 
                dst_end -= cur;
                src_end -= cur;
index c5fae3a..68368ba 100644 (file)
@@ -40,7 +40,6 @@ enum {
        ENUM_BIT(PAGE_START_WRITEBACK),
        ENUM_BIT(PAGE_END_WRITEBACK),
        ENUM_BIT(PAGE_SET_ORDERED),
-       ENUM_BIT(PAGE_LOCK),
 };
 
 /*
@@ -94,6 +93,13 @@ struct extent_buffer {
 #endif
 };
 
+struct btrfs_eb_write_context {
+       struct writeback_control *wbc;
+       struct extent_buffer *eb;
+       /* Block group @eb resides in. Only used for zoned mode. */
+       struct btrfs_block_group *zoned_bg;
+};
+
 /*
  * Get the correct offset inside the page of extent buffer.
  *
@@ -178,8 +184,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask);
 int try_release_extent_buffer(struct page *page);
 
 int btrfs_read_folio(struct file *file, struct folio *folio);
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
-                             struct writeback_control *wbc);
+void extent_write_locked_range(struct inode *inode, struct page *locked_page,
+                              u64 start, u64 end, struct writeback_control *wbc,
+                              bool pages_dirty);
 int extent_writepages(struct address_space *mapping,
                      struct writeback_control *wbc);
 int btree_write_cache_pages(struct address_space *mapping,
@@ -236,11 +243,24 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
 int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
                                       void __user *dst, unsigned long start,
                                       unsigned long len);
-void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
-void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
-               const void *src);
 void write_extent_buffer(const struct extent_buffer *eb, const void *src,
                         unsigned long start, unsigned long len);
+
+static inline void write_extent_buffer_chunk_tree_uuid(
+               const struct extent_buffer *eb, const void *chunk_tree_uuid)
+{
+       write_extent_buffer(eb, chunk_tree_uuid,
+                           offsetof(struct btrfs_header, chunk_tree_uuid),
+                           BTRFS_FSID_SIZE);
+}
+
+static inline void write_extent_buffer_fsid(const struct extent_buffer *eb,
+                                           const void *fsid)
+{
+       write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
+                           BTRFS_FSID_SIZE);
+}
+
 void copy_extent_buffer_full(const struct extent_buffer *dst,
                             const struct extent_buffer *src);
 void copy_extent_buffer(const struct extent_buffer *dst,
@@ -266,7 +286,6 @@ void set_extent_buffer_dirty(struct extent_buffer *eb);
 void set_extent_buffer_uptodate(struct extent_buffer *eb);
 void clear_extent_buffer_uptodate(struct extent_buffer *eb);
 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
-void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
 void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
                                  struct page *locked_page,
                                  u32 bits_to_clear, unsigned long page_ops);
@@ -277,8 +296,6 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
 
 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
 
-void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
-
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
 bool find_lock_delalloc_range(struct inode *inode,
                             struct page *locked_page, u64 *start,
index 0cdb3e8..a6d8368 100644 (file)
@@ -760,8 +760,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
 
                if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
                        start = em_end;
-                       if (end != (u64)-1)
-                               len = start + len - em_end;
                        goto next;
                }
 
@@ -829,8 +827,8 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
                                if (!split)
                                        goto remove_em;
                        }
-                       split->start = start + len;
-                       split->len = em_end - (start + len);
+                       split->start = end;
+                       split->len = em_end - end;
                        split->block_start = em->block_start;
                        split->flags = flags;
                        split->compress_type = em->compress_type;
index 696bf69..1ce5dd1 100644 (file)
@@ -597,29 +597,37 @@ fail:
  * Each bit represents a sector. Thus caller should ensure @csum_buf passed
  * in is large enough to contain all csums.
  */
-int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
-                             u8 *csum_buf, unsigned long *csum_bitmap,
-                             bool search_commit)
+int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
+                             u64 start, u64 end, u8 *csum_buf,
+                             unsigned long *csum_bitmap)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct btrfs_key key;
-       struct btrfs_path *path;
        struct extent_buffer *leaf;
        struct btrfs_csum_item *item;
        const u64 orig_start = start;
+       bool free_path = false;
        int ret;
 
        ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
               IS_ALIGNED(end + 1, fs_info->sectorsize));
 
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
+       if (!path) {
+               path = btrfs_alloc_path();
+               if (!path)
+                       return -ENOMEM;
+               free_path = true;
+       }
 
-       if (search_commit) {
-               path->skip_locking = 1;
-               path->reada = READA_FORWARD;
-               path->search_commit_root = 1;
+       /* Check if we can reuse the previous path. */
+       if (path->nodes[0]) {
+               btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+               if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
+                   key.type == BTRFS_EXTENT_CSUM_KEY &&
+                   key.offset <= start)
+                       goto search_forward;
+               btrfs_release_path(path);
        }
 
        key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -656,6 +664,7 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
                }
        }
 
+search_forward:
        while (start <= end) {
                u64 csum_end;
 
@@ -712,7 +721,8 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
        }
        ret = 0;
 fail:
-       btrfs_free_path(path);
+       if (free_path)
+               btrfs_free_path(path);
        return ret;
 }
 
index 4ec669b..04bd2d3 100644 (file)
@@ -57,9 +57,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
                            struct list_head *list, int search_commit,
                            bool nowait);
-int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
-                             u8 *csum_buf, unsigned long *csum_bitmap,
-                             bool search_commit);
+int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
+                             u64 start, u64 end, u8 *csum_buf,
+                             unsigned long *csum_bitmap);
 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
                                     const struct btrfs_path *path,
                                     struct btrfs_file_extent_item *fi,
index fd03e68..ca46a52 100644 (file)
@@ -876,9 +876,9 @@ static int prepare_uptodate_page(struct inode *inode,
        return 0;
 }
 
-static unsigned int get_prepare_fgp_flags(bool nowait)
+static fgf_t get_prepare_fgp_flags(bool nowait)
 {
-       unsigned int fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
+       fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
 
        if (nowait)
                fgp_flags |= FGP_NOWAIT;
@@ -910,7 +910,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
        int i;
        unsigned long index = pos >> PAGE_SHIFT;
        gfp_t mask = get_prepare_gfp_flags(inode, nowait);
-       unsigned int fgp_flags = get_prepare_fgp_flags(nowait);
+       fgf_t fgp_flags = get_prepare_fgp_flags(nowait);
        int err = 0;
        int faili;
 
@@ -1106,24 +1106,6 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
        btrfs_drew_write_unlock(&inode->root->snapshot_lock);
 }
 
-static void update_time_for_write(struct inode *inode)
-{
-       struct timespec64 now;
-
-       if (IS_NOCMTIME(inode))
-               return;
-
-       now = current_time(inode);
-       if (!timespec64_equal(&inode->i_mtime, &now))
-               inode->i_mtime = now;
-
-       if (!timespec64_equal(&inode->i_ctime, &now))
-               inode->i_ctime = now;
-
-       if (IS_I_VERSION(inode))
-               inode_inc_iversion(inode);
-}
-
 static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
                             size_t count)
 {
@@ -1155,7 +1137,10 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
         * need to start yet another transaction to update the inode as we will
         * update the inode when we finish writing whatever data we write.
         */
-       update_time_for_write(inode);
+       if (!IS_NOCMTIME(inode)) {
+               inode->i_mtime = inode_set_ctime_current(inode);
+               inode_inc_iversion(inode);
+       }
 
        start_pos = round_down(pos, fs_info->sectorsize);
        oldsize = i_size_read(inode);
@@ -2459,10 +2444,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
                 */
                inode_inc_iversion(&inode->vfs_inode);
 
-               if (!extent_info || extent_info->update_times) {
-                       inode->vfs_inode.i_mtime = current_time(&inode->vfs_inode);
-                       inode->vfs_inode.i_ctime = inode->vfs_inode.i_mtime;
-               }
+               if (!extent_info || extent_info->update_times)
+                       inode->vfs_inode.i_mtime = inode_set_ctime_current(&inode->vfs_inode);
 
                ret = btrfs_update_inode(trans, root, inode);
                if (ret)
@@ -2703,8 +2686,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
 
        ASSERT(trans != NULL);
        inode_inc_iversion(inode);
-       inode->i_mtime = current_time(inode);
-       inode->i_ctime = inode->i_mtime;
+       inode->i_mtime = inode_set_ctime_current(inode);
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
        updated_inode = true;
        btrfs_end_transaction(trans);
@@ -2721,11 +2703,10 @@ out_only_mutex:
                 * for detecting, at fsync time, if the inode isn't yet in the
                 * log tree or it's there but not up to date.
                 */
-               struct timespec64 now = current_time(inode);
+               struct timespec64 now = inode_set_ctime_current(inode);
 
                inode_inc_iversion(inode);
                inode->i_mtime = now;
-               inode->i_ctime = now;
                trans = btrfs_start_transaction(root, 1);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
@@ -2796,7 +2777,7 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        i_size_write(inode, end);
        btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
@@ -3018,7 +2999,7 @@ static long btrfs_fallocate(struct file *file, int mode,
        struct extent_changeset *data_reserved = NULL;
        struct falloc_range *range;
        struct falloc_range *tmp;
-       struct list_head reserve_list;
+       LIST_HEAD(reserve_list);
        u64 cur_offset;
        u64 last_byte;
        u64 alloc_start;
@@ -3110,7 +3091,6 @@ static long btrfs_fallocate(struct file *file, int mode,
        btrfs_assert_inode_range_clean(BTRFS_I(inode), alloc_start, locked_end);
 
        /* First, check if we exceed the qgroup limit */
-       INIT_LIST_HEAD(&reserve_list);
        while (cur_offset < alloc_end) {
                em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
                                      alloc_end - cur_offset);
index 8808004..27fad70 100644 (file)
@@ -1219,10 +1219,9 @@ static noinline_for_stack int write_pinned_extent_entries(
        start = block_group->start;
 
        while (start < block_group->start + block_group->length) {
-               ret = find_first_extent_bit(unpin, start,
-                                           &extent_start, &extent_end,
-                                           EXTENT_DIRTY, NULL);
-               if (ret)
+               if (!find_first_extent_bit(unpin, start,
+                                          &extent_start, &extent_end,
+                                          EXTENT_DIRTY, NULL))
                        return 0;
 
                /* This pinned extent is out of our range */
@@ -2705,13 +2704,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
                bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
 
        spin_lock(&ctl->tree_lock);
-       /* Count initial region as zone_unusable until it gets activated. */
        if (!used)
                to_free = size;
-       else if (initial &&
-                test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) &&
-                (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
-               to_free = 0;
        else if (initial)
                to_free = block_group->zone_capacity;
        else if (offset >= block_group->alloc_offset)
@@ -2739,8 +2733,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
        reclaimable_unusable = block_group->zone_unusable -
                               (block_group->length - block_group->zone_capacity);
        /* All the region is now unusable. Mark it as unused and reclaim */
-       if (block_group->zone_unusable == block_group->length &&
-           block_group->alloc_offset) {
+       if (block_group->zone_unusable == block_group->length) {
                btrfs_mark_bg_unused(block_group);
        } else if (bg_reclaim_threshold &&
                   reclaimable_unusable >=
@@ -2944,7 +2937,8 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
        btrfs_info(fs_info, "block group has cluster?: %s",
               list_empty(&block_group->cluster_list) ? "no" : "yes");
        btrfs_info(fs_info,
-                  "%d blocks of free space at or bigger than bytes is", count);
+                  "%d free space entries at or bigger than %llu bytes",
+                  count, bytes);
 }
 
 void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
index 045ddce..c0e7340 100644 (file)
@@ -1515,9 +1515,15 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
                        if (prev_bit == 0 && bit == 1) {
                                extent_start = offset;
                        } else if (prev_bit == 1 && bit == 0) {
-                               total_found += add_new_free_space(block_group,
-                                                                 extent_start,
-                                                                 offset);
+                               u64 space_added;
+
+                               ret = btrfs_add_new_free_space(block_group,
+                                                              extent_start,
+                                                              offset,
+                                                              &space_added);
+                               if (ret)
+                                       goto out;
+                               total_found += space_added;
                                if (total_found > CACHING_CTL_WAKE_UP) {
                                        total_found = 0;
                                        wake_up(&caching_ctl->wait);
@@ -1529,8 +1535,9 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
                }
        }
        if (prev_bit == 1) {
-               total_found += add_new_free_space(block_group, extent_start,
-                                                 end);
+               ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
+               if (ret)
+                       goto out;
                extent_count++;
        }
 
@@ -1569,6 +1576,8 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
        end = block_group->start + block_group->length;
 
        while (1) {
+               u64 space_added;
+
                ret = btrfs_next_item(root, path);
                if (ret < 0)
                        goto out;
@@ -1583,8 +1592,12 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
                ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
                ASSERT(key.objectid < end && key.objectid + key.offset <= end);
 
-               total_found += add_new_free_space(block_group, key.objectid,
-                                                 key.objectid + key.offset);
+               ret = btrfs_add_new_free_space(block_group, key.objectid,
+                                              key.objectid + key.offset,
+                                              &space_added);
+               if (ret)
+                       goto out;
+               total_found += space_added;
                if (total_found > CACHING_CTL_WAKE_UP) {
                        total_found = 0;
                        wake_up(&caching_ctl->wait);
index 203d2a2..a523d64 100644 (file)
@@ -46,8 +46,6 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
  * Runtime (in-memory) states of filesystem
  */
 enum {
-       /* Global indicator of serious filesystem errors */
-       BTRFS_FS_STATE_ERROR,
        /*
         * Filesystem is being remounted, allow to skip some operations, like
         * defrag
@@ -686,6 +684,12 @@ struct btrfs_fs_info {
        bool qgroup_rescan_running;
        u8 qgroup_drop_subtree_thres;
 
+       /*
+        * If this is not 0, then it indicates a serious filesystem error has
+        * happened and it contains that error (negative errno value).
+        */
+       int fs_error;
+
        /* Filesystem state */
        unsigned long fs_state;
 
@@ -766,6 +770,9 @@ struct btrfs_fs_info {
        u64 data_reloc_bg;
        struct mutex zoned_data_reloc_io_lock;
 
+       struct btrfs_block_group *active_meta_bg;
+       struct btrfs_block_group *active_system_bg;
+
        u64 nr_global_roots;
 
        spinlock_t zone_active_bgs_lock;
@@ -962,8 +969,8 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
        clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
 }
 
-#define BTRFS_FS_ERROR(fs_info)        (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
-                                                  &(fs_info)->fs_state)))
+#define BTRFS_FS_ERROR(fs_info)        (READ_ONCE((fs_info)->fs_error))
+
 #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info)                            \
        (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR,            \
                           &(fs_info)->fs_state)))
index dbbb672..f09fbdc 100644 (file)
@@ -124,11 +124,11 @@ static struct kmem_cache *btrfs_inode_cachep;
 
 static int btrfs_setsize(struct inode *inode, struct iattr *attr);
 static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback);
-static noinline int cow_file_range(struct btrfs_inode *inode,
-                                  struct page *locked_page,
-                                  u64 start, u64 end, int *page_started,
-                                  unsigned long *nr_written, int unlock,
-                                  u64 *done_offset);
+
+static noinline int run_delalloc_cow(struct btrfs_inode *inode,
+                                    struct page *locked_page, u64 start,
+                                    u64 end, struct writeback_control *wbc,
+                                    bool pages_dirty);
 static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
                                       u64 len, u64 orig_start, u64 block_start,
                                       u64 block_len, u64 orig_block_len,
@@ -423,11 +423,10 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
 
        while (index <= end_index) {
                /*
-                * For locked page, we will call end_extent_writepage() on it
-                * in run_delalloc_range() for the error handling.  That
-                * end_extent_writepage() function will call
-                * btrfs_mark_ordered_io_finished() to clear page Ordered and
-                * run the ordered extent accounting.
+                * For locked page, we will call btrfs_mark_ordered_io_finished
+                * through btrfs_mark_ordered_io_finished() on it
+                * in run_delalloc_range() for the error handling, which will
+                * clear page Ordered and run the ordered extent accounting.
                 *
                 * Here we can't just clear the Ordered bit, or
                 * btrfs_mark_ordered_io_finished() would skip the accounting
@@ -815,24 +814,22 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
 }
 
 /*
- * we create compressed extents in two phases.  The first
- * phase compresses a range of pages that have already been
- * locked (both pages and state bits are locked).
+ * Work queue call back to started compression on a file and pages.
  *
- * This is done inside an ordered work queue, and the compression
- * is spread across many cpus.  The actual IO submission is step
- * two, and the ordered work queue takes care of making sure that
- * happens in the same order things were put onto the queue by
- * writepages and friends.
+ * This is done inside an ordered work queue, and the compression is spread
+ * across many cpus.  The actual IO submission is step two, and the ordered work
+ * queue takes care of making sure that happens in the same order things were
+ * put onto the queue by writepages and friends.
  *
- * If this code finds it can't get good compression, it puts an
- * entry onto the work queue to write the uncompressed bytes.  This
- * makes sure that both compressed inodes and uncompressed inodes
- * are written in the same order that the flusher thread sent them
- * down.
+ * If this code finds it can't get good compression, it puts an entry onto the
+ * work queue to write the uncompressed bytes.  This makes sure that both
+ * compressed inodes and uncompressed inodes are written in the same order that
+ * the flusher thread sent them down.
  */
-static noinline int compress_file_range(struct async_chunk *async_chunk)
+static void compress_file_range(struct btrfs_work *work)
 {
+       struct async_chunk *async_chunk =
+               container_of(work, struct async_chunk, work);
        struct btrfs_inode *inode = async_chunk->inode;
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct address_space *mapping = inode->vfs_inode.i_mapping;
@@ -842,19 +839,24 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
        u64 actual_end;
        u64 i_size;
        int ret = 0;
-       struct page **pages = NULL;
+       struct page **pages;
        unsigned long nr_pages;
        unsigned long total_compressed = 0;
        unsigned long total_in = 0;
+       unsigned int poff;
        int i;
-       int will_compress;
        int compress_type = fs_info->compress_type;
-       int compressed_extents = 0;
-       int redirty = 0;
 
        inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
 
        /*
+        * We need to call clear_page_dirty_for_io on each page in the range.
+        * Otherwise applications with the file mmap'd can wander in and change
+        * the page contents while we are compressing them.
+        */
+       extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end);
+
+       /*
         * We need to save i_size before now because it could change in between
         * us evaluating the size and assigning it.  This is because we lock and
         * unlock the page in truncate and fallocate, and then modify the i_size
@@ -868,7 +870,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
        barrier();
        actual_end = min_t(u64, i_size, end + 1);
 again:
-       will_compress = 0;
+       pages = NULL;
        nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
        nr_pages = min_t(unsigned long, nr_pages, BTRFS_MAX_COMPRESSED_PAGES);
 
@@ -912,78 +914,57 @@ again:
        ret = 0;
 
        /*
-        * we do compression for mount -o compress and when the
-        * inode has not been flagged as nocompress.  This flag can
-        * change at any time if we discover bad compression ratios.
+        * We do compression for mount -o compress and when the inode has not
+        * been flagged as NOCOMPRESS.  This flag can change at any time if we
+        * discover bad compression ratios.
         */
-       if (inode_need_compress(inode, start, end)) {
-               WARN_ON(pages);
-               pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
-               if (!pages) {
-                       /* just bail out to the uncompressed code */
-                       nr_pages = 0;
-                       goto cont;
-               }
-
-               if (inode->defrag_compress)
-                       compress_type = inode->defrag_compress;
-               else if (inode->prop_compress)
-                       compress_type = inode->prop_compress;
+       if (!inode_need_compress(inode, start, end))
+               goto cleanup_and_bail_uncompressed;
 
+       pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+       if (!pages) {
                /*
-                * we need to call clear_page_dirty_for_io on each
-                * page in the range.  Otherwise applications with the file
-                * mmap'd can wander in and change the page contents while
-                * we are compressing them.
-                *
-                * If the compression fails for any reason, we set the pages
-                * dirty again later on.
-                *
-                * Note that the remaining part is redirtied, the start pointer
-                * has moved, the end is the original one.
+                * Memory allocation failure is not a fatal error, we can fall
+                * back to uncompressed code.
                 */
-               if (!redirty) {
-                       extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end);
-                       redirty = 1;
-               }
+               goto cleanup_and_bail_uncompressed;
+       }
 
-               /* Compression level is applied here and only here */
-               ret = btrfs_compress_pages(
-                       compress_type | (fs_info->compress_level << 4),
-                                          mapping, start,
-                                          pages,
-                                          &nr_pages,
-                                          &total_in,
-                                          &total_compressed);
+       if (inode->defrag_compress)
+               compress_type = inode->defrag_compress;
+       else if (inode->prop_compress)
+               compress_type = inode->prop_compress;
+
+       /* Compression level is applied here. */
+       ret = btrfs_compress_pages(compress_type | (fs_info->compress_level << 4),
+                                  mapping, start, pages, &nr_pages, &total_in,
+                                  &total_compressed);
+       if (ret)
+               goto mark_incompressible;
 
-               if (!ret) {
-                       unsigned long offset = offset_in_page(total_compressed);
-                       struct page *page = pages[nr_pages - 1];
+       /*
+        * Zero the tail end of the last page, as we might be sending it down
+        * to disk.
+        */
+       poff = offset_in_page(total_compressed);
+       if (poff)
+               memzero_page(pages[nr_pages - 1], poff, PAGE_SIZE - poff);
 
-                       /* zero the tail end of the last page, we might be
-                        * sending it down to disk
-                        */
-                       if (offset)
-                               memzero_page(page, offset, PAGE_SIZE - offset);
-                       will_compress = 1;
-               }
-       }
-cont:
        /*
+        * Try to create an inline extent.
+        *
+        * If we didn't compress the entire range, try to create an uncompressed
+        * inline extent, else a compressed one.
+        *
         * Check cow_file_range() for why we don't even try to create inline
-        * extent for subpage case.
+        * extent for the subpage case.
         */
        if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
-               /* lets try to make an inline extent */
-               if (ret || total_in < actual_end) {
-                       /* we didn't compress the entire range, try
-                        * to make an uncompressed inline extent.
-                        */
-                       ret = cow_file_range_inline(inode, actual_end,
-                                                   0, BTRFS_COMPRESS_NONE,
-                                                   NULL, false);
+               if (total_in < actual_end) {
+                       ret = cow_file_range_inline(inode, actual_end, 0,
+                                                   BTRFS_COMPRESS_NONE, NULL,
+                                                   false);
                } else {
-                       /* try making a compressed inline extent */
                        ret = cow_file_range_inline(inode, actual_end,
                                                    total_compressed,
                                                    compress_type, pages,
@@ -1013,99 +994,52 @@ cont:
                                                     PAGE_UNLOCK |
                                                     PAGE_START_WRITEBACK |
                                                     PAGE_END_WRITEBACK);
-
-                       /*
-                        * Ensure we only free the compressed pages if we have
-                        * them allocated, as we can still reach here with
-                        * inode_need_compress() == false.
-                        */
-                       if (pages) {
-                               for (i = 0; i < nr_pages; i++) {
-                                       WARN_ON(pages[i]->mapping);
-                                       put_page(pages[i]);
-                               }
-                               kfree(pages);
-                       }
-                       return 0;
+                       goto free_pages;
                }
        }
 
-       if (will_compress) {
-               /*
-                * we aren't doing an inline extent round the compressed size
-                * up to a block size boundary so the allocator does sane
-                * things
-                */
-               total_compressed = ALIGN(total_compressed, blocksize);
+       /*
+        * We aren't doing an inline extent. Round the compressed size up to a
+        * block size boundary so the allocator does sane things.
+        */
+       total_compressed = ALIGN(total_compressed, blocksize);
 
-               /*
-                * one last check to make sure the compression is really a
-                * win, compare the page count read with the blocks on disk,
-                * compression must free at least one sector size
-                */
-               total_in = round_up(total_in, fs_info->sectorsize);
-               if (total_compressed + blocksize <= total_in) {
-                       compressed_extents++;
+       /*
+        * One last check to make sure the compression is really a win, compare
+        * the page count read with the blocks on disk, compression must free at
+        * least one sector.
+        */
+       total_in = round_up(total_in, fs_info->sectorsize);
+       if (total_compressed + blocksize > total_in)
+               goto mark_incompressible;
 
-                       /*
-                        * The async work queues will take care of doing actual
-                        * allocation on disk for these compressed pages, and
-                        * will submit them to the elevator.
-                        */
-                       add_async_extent(async_chunk, start, total_in,
-                                       total_compressed, pages, nr_pages,
-                                       compress_type);
-
-                       if (start + total_in < end) {
-                               start += total_in;
-                               pages = NULL;
-                               cond_resched();
-                               goto again;
-                       }
-                       return compressed_extents;
-               }
+       /*
+        * The async work queues will take care of doing actual allocation on
+        * disk for these compressed pages, and will submit the bios.
+        */
+       add_async_extent(async_chunk, start, total_in, total_compressed, pages,
+                        nr_pages, compress_type);
+       if (start + total_in < end) {
+               start += total_in;
+               cond_resched();
+               goto again;
        }
+       return;
+
+mark_incompressible:
+       if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && !inode->prop_compress)
+               inode->flags |= BTRFS_INODE_NOCOMPRESS;
+cleanup_and_bail_uncompressed:
+       add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
+                        BTRFS_COMPRESS_NONE);
+free_pages:
        if (pages) {
-               /*
-                * the compression code ran but failed to make things smaller,
-                * free any pages it allocated and our page pointer array
-                */
                for (i = 0; i < nr_pages; i++) {
                        WARN_ON(pages[i]->mapping);
                        put_page(pages[i]);
                }
                kfree(pages);
-               pages = NULL;
-               total_compressed = 0;
-               nr_pages = 0;
-
-               /* flag the file so we don't compress in the future */
-               if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
-                   !(inode->prop_compress)) {
-                       inode->flags |= BTRFS_INODE_NOCOMPRESS;
-               }
-       }
-cleanup_and_bail_uncompressed:
-       /*
-        * No compression, but we still need to write the pages in the file
-        * we've been given so far.  redirty the locked page if it corresponds
-        * to our extent and set things up for the async work queue to run
-        * cow_file_range to do the normal delalloc dance.
-        */
-       if (async_chunk->locked_page &&
-           (page_offset(async_chunk->locked_page) >= start &&
-            page_offset(async_chunk->locked_page)) <= end) {
-               __set_page_dirty_nobuffers(async_chunk->locked_page);
-               /* unlocked later on in the async handlers */
        }
-
-       if (redirty)
-               extent_range_redirty_for_io(&inode->vfs_inode, start, end);
-       add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
-                        BTRFS_COMPRESS_NONE);
-       compressed_extents++;
-
-       return compressed_extents;
 }
 
 static void free_async_extent_pages(struct async_extent *async_extent)
@@ -1124,14 +1058,12 @@ static void free_async_extent_pages(struct async_extent *async_extent)
        async_extent->pages = NULL;
 }
 
-static int submit_uncompressed_range(struct btrfs_inode *inode,
-                                    struct async_extent *async_extent,
-                                    struct page *locked_page)
+static void submit_uncompressed_range(struct btrfs_inode *inode,
+                                     struct async_extent *async_extent,
+                                     struct page *locked_page)
 {
        u64 start = async_extent->start;
        u64 end = async_extent->start + async_extent->ram_size - 1;
-       unsigned long nr_written = 0;
-       int page_started = 0;
        int ret;
        struct writeback_control wbc = {
                .sync_mode              = WB_SYNC_ALL,
@@ -1140,45 +1072,33 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
                .no_cgroup_owner        = 1,
        };
 
-       /*
-        * Call cow_file_range() to run the delalloc range directly, since we
-        * won't go to NOCOW or async path again.
-        *
-        * Also we call cow_file_range() with @unlock_page == 0, so that we
-        * can directly submit them without interruption.
-        */
-       ret = cow_file_range(inode, locked_page, start, end, &page_started,
-                            &nr_written, 0, NULL);
-       /* Inline extent inserted, page gets unlocked and everything is done */
-       if (page_started)
-               return 0;
-
+       wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
+       ret = run_delalloc_cow(inode, locked_page, start, end, &wbc, false);
+       wbc_detach_inode(&wbc);
        if (ret < 0) {
                btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
                if (locked_page) {
                        const u64 page_start = page_offset(locked_page);
-                       const u64 page_end = page_start + PAGE_SIZE - 1;
 
                        set_page_writeback(locked_page);
                        end_page_writeback(locked_page);
-                       end_extent_writepage(locked_page, ret, page_start, page_end);
+                       btrfs_mark_ordered_io_finished(inode, locked_page,
+                                                      page_start, PAGE_SIZE,
+                                                      !ret);
+                       btrfs_page_clear_uptodate(inode->root->fs_info,
+                                                 locked_page, page_start,
+                                                 PAGE_SIZE);
+                       mapping_set_error(locked_page->mapping, ret);
                        unlock_page(locked_page);
                }
-               return ret;
        }
-
-       /* All pages will be unlocked, including @locked_page */
-       wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
-       ret = extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
-       wbc_detach_inode(&wbc);
-       return ret;
 }
 
-static int submit_one_async_extent(struct btrfs_inode *inode,
-                                  struct async_chunk *async_chunk,
-                                  struct async_extent *async_extent,
-                                  u64 *alloc_hint)
+static void submit_one_async_extent(struct async_chunk *async_chunk,
+                                   struct async_extent *async_extent,
+                                   u64 *alloc_hint)
 {
+       struct btrfs_inode *inode = async_chunk->inode;
        struct extent_io_tree *io_tree = &inode->io_tree;
        struct btrfs_root *root = inode->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1206,9 +1126,8 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
        }
        lock_extent(io_tree, start, end, NULL);
 
-       /* We have fall back to uncompressed write */
-       if (!async_extent->pages) {
-               ret = submit_uncompressed_range(inode, async_extent, locked_page);
+       if (async_extent->compress_type == BTRFS_COMPRESS_NONE) {
+               submit_uncompressed_range(inode, async_extent, locked_page);
                goto done;
        }
 
@@ -1217,7 +1136,6 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
                                   async_extent->compressed_size,
                                   0, *alloc_hint, &ins, 1, 1);
        if (ret) {
-               free_async_extent_pages(async_extent);
                /*
                 * Here we used to try again by going back to non-compressed
                 * path for ENOSPC.  But we can't reserve space even for
@@ -1272,7 +1190,7 @@ done:
        if (async_chunk->blkcg_css)
                kthread_associate_blkcg(NULL);
        kfree(async_extent);
-       return ret;
+       return;
 
 out_free_reserve:
        btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1286,39 +1204,13 @@ out_free:
                                     PAGE_UNLOCK | PAGE_START_WRITEBACK |
                                     PAGE_END_WRITEBACK);
        free_async_extent_pages(async_extent);
-       goto done;
-}
-
-/*
- * Phase two of compressed writeback.  This is the ordered portion of the code,
- * which only gets called in the order the work was queued.  We walk all the
- * async extents created by compress_file_range and send them down to the disk.
- */
-static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
-{
-       struct btrfs_inode *inode = async_chunk->inode;
-       struct btrfs_fs_info *fs_info = inode->root->fs_info;
-       struct async_extent *async_extent;
-       u64 alloc_hint = 0;
-       int ret = 0;
-
-       while (!list_empty(&async_chunk->extents)) {
-               u64 extent_start;
-               u64 ram_size;
-
-               async_extent = list_entry(async_chunk->extents.next,
-                                         struct async_extent, list);
-               list_del(&async_extent->list);
-               extent_start = async_extent->start;
-               ram_size = async_extent->ram_size;
-
-               ret = submit_one_async_extent(inode, async_chunk, async_extent,
-                                             &alloc_hint);
-               btrfs_debug(fs_info,
+       if (async_chunk->blkcg_css)
+               kthread_associate_blkcg(NULL);
+       btrfs_debug(fs_info,
 "async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d",
-                           inode->root->root_key.objectid,
-                           btrfs_ino(inode), extent_start, ram_size, ret);
-       }
+                   root->root_key.objectid, btrfs_ino(inode), start,
+                   async_extent->ram_size, ret);
+       kfree(async_extent);
 }
 
 static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
@@ -1362,25 +1254,18 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
  * locked_page is the page that writepage had locked already.  We use
  * it to make sure we don't do extra locks or unlocks.
  *
- * *page_started is set to one if we unlock locked_page and do everything
- * required to start IO on it.  It may be clean and already done with
- * IO when we return.
- *
- * When unlock == 1, we unlock the pages in successfully allocated regions.
- * When unlock == 0, we leave them locked for writing them out.
+ * When this function fails, it unlocks all pages except @locked_page.
  *
- * However, we unlock all the pages except @locked_page in case of failure.
+ * When this function successfully creates an inline extent, it returns 1 and
+ * unlocks all pages including locked_page and starts I/O on them.
+ * (In reality inline extents are limited to a single page, so locked_page is
+ * the only page handled anyway).
  *
- * In summary, page locking state will be as follow:
+ * When this function succeed and creates a normal extent, the page locking
+ * status depends on the passed in flags:
  *
- * - page_started == 1 (return value)
- *     - All the pages are unlocked. IO is started.
- *     - Note that this can happen only on success
- * - unlock == 1
- *     - All the pages except @locked_page are unlocked in any case
- * - unlock == 0
- *     - On success, all the pages are locked for writing out them
- *     - On failure, all the pages except @locked_page are unlocked
+ * - If @keep_locked is set, all pages are kept locked.
+ * - Else all pages except for @locked_page are unlocked.
  *
  * When a failure happens in the second or later iteration of the
  * while-loop, the ordered extents created in previous iterations are kept
@@ -1389,10 +1274,9 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
  * example.
  */
 static noinline int cow_file_range(struct btrfs_inode *inode,
-                                  struct page *locked_page,
-                                  u64 start, u64 end, int *page_started,
-                                  unsigned long *nr_written, int unlock,
-                                  u64 *done_offset)
+                                  struct page *locked_page, u64 start, u64 end,
+                                  u64 *done_offset,
+                                  bool keep_locked, bool no_inline)
 {
        struct btrfs_root *root = inode->root;
        struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1431,7 +1315,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
         * This means we can trigger inline extent even if we didn't want to.
         * So here we skip inline extent creation completely.
         */
-       if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
+       if (start == 0 && fs_info->sectorsize == PAGE_SIZE && !no_inline) {
                u64 actual_end = min_t(u64, i_size_read(&inode->vfs_inode),
                                       end + 1);
 
@@ -1451,9 +1335,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
                                     EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
                                     EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
                                     PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
-                       *nr_written = *nr_written +
-                            (end - start + PAGE_SIZE) / PAGE_SIZE;
-                       *page_started = 1;
                        /*
                         * locked_page is locked by the caller of
                         * writepage_delalloc(), not locked by
@@ -1463,11 +1344,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
                         * as it doesn't have any subpage::writers recorded.
                         *
                         * Here we manually unlock the page, since the caller
-                        * can't use page_started to determine if it's an
-                        * inline extent or a compressed extent.
+                        * can't determine if it's an inline extent or a
+                        * compressed extent.
                         */
                        unlock_page(locked_page);
-                       goto out;
+                       ret = 1;
+                       goto done;
                } else if (ret < 0) {
                        goto out_unlock;
                }
@@ -1498,6 +1380,31 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
                ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
                                           min_alloc_size, 0, alloc_hint,
                                           &ins, 1, 1);
+               if (ret == -EAGAIN) {
+                       /*
+                        * btrfs_reserve_extent only returns -EAGAIN for zoned
+                        * file systems, which is an indication that there are
+                        * no active zones to allocate from at the moment.
+                        *
+                        * If this is the first loop iteration, wait for at
+                        * least one zone to finish before retrying the
+                        * allocation.  Otherwise ask the caller to write out
+                        * the already allocated blocks before coming back to
+                        * us, or return -ENOSPC if it can't handle retries.
+                        */
+                       ASSERT(btrfs_is_zoned(fs_info));
+                       if (start == orig_start) {
+                               wait_on_bit_io(&inode->root->fs_info->flags,
+                                              BTRFS_FS_NEED_ZONE_FINISH,
+                                              TASK_UNINTERRUPTIBLE);
+                               continue;
+                       }
+                       if (done_offset) {
+                               *done_offset = start - 1;
+                               return 0;
+                       }
+                       ret = -ENOSPC;
+               }
                if (ret < 0)
                        goto out_unlock;
                cur_alloc_size = ins.offset;
@@ -1558,7 +1465,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
                 * Do set the Ordered (Private2) bit so we know this page was
                 * properly setup for writepage.
                 */
-               page_ops = unlock ? PAGE_UNLOCK : 0;
+               page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
                page_ops |= PAGE_SET_ORDERED;
 
                extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
@@ -1581,7 +1488,9 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
                if (ret)
                        goto out_unlock;
        }
-out:
+done:
+       if (done_offset)
+               *done_offset = end;
        return ret;
 
 out_drop_extent_cache:
@@ -1591,21 +1500,6 @@ out_reserve:
        btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
 out_unlock:
        /*
-        * If done_offset is non-NULL and ret == -EAGAIN, we expect the
-        * caller to write out the successfully allocated region and retry.
-        */
-       if (done_offset && ret == -EAGAIN) {
-               if (orig_start < start)
-                       *done_offset = start - 1;
-               else
-                       *done_offset = start;
-               return ret;
-       } else if (ret == -EAGAIN) {
-               /* Convert to -ENOSPC since the caller cannot retry. */
-               ret = -ENOSPC;
-       }
-
-       /*
         * Now, we have three regions to clean up:
         *
         * |-------(1)----|---(2)---|-------------(3)----------|
@@ -1627,10 +1521,10 @@ out_unlock:
         * EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup
         * function.
         *
-        * However, in case of unlock == 0, we still need to unlock the pages
+        * However, in case of @keep_locked, we still need to unlock the pages
         * (except @locked_page) to ensure all the pages are unlocked.
         */
-       if (!unlock && orig_start < start) {
+       if (keep_locked && orig_start < start) {
                if (!locked_page)
                        mapping_set_error(inode->vfs_inode.i_mapping, ret);
                extent_clear_unlock_delalloc(inode, orig_start, start - 1,
@@ -1654,8 +1548,6 @@ out_unlock:
                                             clear_bits,
                                             page_ops);
                start += cur_alloc_size;
-               if (start >= end)
-                       return ret;
        }
 
        /*
@@ -1664,50 +1556,37 @@ out_unlock:
         * space_info's bytes_may_use counter, reserved in
         * btrfs_check_data_free_space().
         */
-       extent_clear_unlock_delalloc(inode, start, end, locked_page,
-                                    clear_bits | EXTENT_CLEAR_DATA_RESV,
-                                    page_ops);
-       return ret;
-}
-
-/*
- * work queue call back to started compression on a file and pages
- */
-static noinline void async_cow_start(struct btrfs_work *work)
-{
-       struct async_chunk *async_chunk;
-       int compressed_extents;
-
-       async_chunk = container_of(work, struct async_chunk, work);
-
-       compressed_extents = compress_file_range(async_chunk);
-       if (compressed_extents == 0) {
-               btrfs_add_delayed_iput(async_chunk->inode);
-               async_chunk->inode = NULL;
+       if (start < end) {
+               clear_bits |= EXTENT_CLEAR_DATA_RESV;
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            clear_bits, page_ops);
        }
+       return ret;
 }
 
 /*
- * work queue call back to submit previously compressed pages
+ * Phase two of compressed writeback.  This is the ordered portion of the code,
+ * which only gets called in the order the work was queued.  We walk all the
+ * async extents created by compress_file_range and send them down to the disk.
  */
-static noinline void async_cow_submit(struct btrfs_work *work)
+static noinline void submit_compressed_extents(struct btrfs_work *work)
 {
        struct async_chunk *async_chunk = container_of(work, struct async_chunk,
                                                     work);
        struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
+       struct async_extent *async_extent;
        unsigned long nr_pages;
+       u64 alloc_hint = 0;
 
        nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
                PAGE_SHIFT;
 
-       /*
-        * ->inode could be NULL if async_chunk_start has failed to compress,
-        * in which case we don't have anything to submit, yet we need to
-        * always adjust ->async_delalloc_pages as its paired with the init
-        * happening in run_delalloc_compressed
-        */
-       if (async_chunk->inode)
-               submit_compressed_extents(async_chunk);
+       while (!list_empty(&async_chunk->extents)) {
+               async_extent = list_entry(async_chunk->extents.next,
+                                         struct async_extent, list);
+               list_del(&async_extent->list);
+               submit_one_async_extent(async_chunk, async_extent, &alloc_hint);
+       }
 
        /* atomic_sub_return implies a barrier */
        if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
@@ -1721,8 +1600,7 @@ static noinline void async_cow_free(struct btrfs_work *work)
        struct async_cow *async_cow;
 
        async_chunk = container_of(work, struct async_chunk, work);
-       if (async_chunk->inode)
-               btrfs_add_delayed_iput(async_chunk->inode);
+       btrfs_add_delayed_iput(async_chunk->inode);
        if (async_chunk->blkcg_css)
                css_put(async_chunk->blkcg_css);
 
@@ -1732,10 +1610,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
 }
 
 static bool run_delalloc_compressed(struct btrfs_inode *inode,
-                                   struct writeback_control *wbc,
-                                   struct page *locked_page,
-                                   u64 start, u64 end, int *page_started,
-                                   unsigned long *nr_written)
+                                   struct page *locked_page, u64 start,
+                                   u64 end, struct writeback_control *wbc)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
@@ -1809,65 +1685,42 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
                        async_chunk[i].blkcg_css = NULL;
                }
 
-               btrfs_init_work(&async_chunk[i].work, async_cow_start,
-                               async_cow_submit, async_cow_free);
+               btrfs_init_work(&async_chunk[i].work, compress_file_range,
+                               submit_compressed_extents, async_cow_free);
 
                nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
                atomic_add(nr_pages, &fs_info->async_delalloc_pages);
 
                btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
 
-               *nr_written += nr_pages;
                start = cur_end + 1;
        }
-       *page_started = 1;
        return true;
 }
 
-static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
-                                      struct page *locked_page, u64 start,
-                                      u64 end, int *page_started,
-                                      unsigned long *nr_written,
-                                      struct writeback_control *wbc)
+/*
+ * Run the delalloc range from start to end, and write back any dirty pages
+ * covered by the range.
+ */
+static noinline int run_delalloc_cow(struct btrfs_inode *inode,
+                                    struct page *locked_page, u64 start,
+                                    u64 end, struct writeback_control *wbc,
+                                    bool pages_dirty)
 {
        u64 done_offset = end;
        int ret;
-       bool locked_page_done = false;
 
        while (start <= end) {
-               ret = cow_file_range(inode, locked_page, start, end, page_started,
-                                    nr_written, 0, &done_offset);
-               if (ret && ret != -EAGAIN)
+               ret = cow_file_range(inode, locked_page, start, end, &done_offset,
+                                    true, false);
+               if (ret)
                        return ret;
-
-               if (*page_started) {
-                       ASSERT(ret == 0);
-                       return 0;
-               }
-
-               if (ret == 0)
-                       done_offset = end;
-
-               if (done_offset == start) {
-                       wait_on_bit_io(&inode->root->fs_info->flags,
-                                      BTRFS_FS_NEED_ZONE_FINISH,
-                                      TASK_UNINTERRUPTIBLE);
-                       continue;
-               }
-
-               if (!locked_page_done) {
-                       __set_page_dirty_nobuffers(locked_page);
-                       account_page_redirty(locked_page);
-               }
-               locked_page_done = true;
-               extent_write_locked_range(&inode->vfs_inode, start, done_offset,
-                                         wbc);
+               extent_write_locked_range(&inode->vfs_inode, locked_page, start,
+                                         done_offset, wbc, pages_dirty);
                start = done_offset + 1;
        }
 
-       *page_started = 1;
-
-       return 0;
+       return 1;
 }
 
 static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
@@ -1894,8 +1747,7 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
 }
 
 static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
-                          const u64 start, const u64 end,
-                          int *page_started, unsigned long *nr_written)
+                          const u64 start, const u64 end)
 {
        const bool is_space_ino = btrfs_is_free_space_inode(inode);
        const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
@@ -1903,6 +1755,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
        struct extent_io_tree *io_tree = &inode->io_tree;
        u64 range_start = start;
        u64 count;
+       int ret;
 
        /*
         * If EXTENT_NORESERVE is set it means that when the buffered write was
@@ -1955,8 +1808,14 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
                                         NULL);
        }
 
-       return cow_file_range(inode, locked_page, start, end, page_started,
-                             nr_written, 1, NULL);
+       /*
+        * Don't try to create inline extents, as a mix of inline extent that
+        * is written out and unlocked directly and a normal NOCOW extent
+        * doesn't work.
+        */
+       ret = cow_file_range(inode, locked_page, start, end, NULL, false, true);
+       ASSERT(ret != 1);
+       return ret;
 }
 
 struct can_nocow_file_extent_args {
@@ -2105,9 +1964,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,
  */
 static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
                                       struct page *locked_page,
-                                      const u64 start, const u64 end,
-                                      int *page_started,
-                                      unsigned long *nr_written)
+                                      const u64 start, const u64 end)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        struct btrfs_root *root = inode->root;
@@ -2117,25 +1974,26 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
        int ret;
        bool check_prev = true;
        u64 ino = btrfs_ino(inode);
-       struct btrfs_block_group *bg;
-       bool nocow = false;
        struct can_nocow_file_extent_args nocow_args = { 0 };
 
+       /*
+        * Normally on a zoned device we're only doing COW writes, but in case
+        * of relocation on a zoned filesystem serializes I/O so that we're only
+        * writing sequentially and can end up here as well.
+        */
+       ASSERT(!btrfs_is_zoned(fs_info) || btrfs_is_data_reloc_root(root));
+
        path = btrfs_alloc_path();
        if (!path) {
-               extent_clear_unlock_delalloc(inode, start, end, locked_page,
-                                            EXTENT_LOCKED | EXTENT_DELALLOC |
-                                            EXTENT_DO_ACCOUNTING |
-                                            EXTENT_DEFRAG, PAGE_UNLOCK |
-                                            PAGE_START_WRITEBACK |
-                                            PAGE_END_WRITEBACK);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto error;
        }
 
        nocow_args.end = end;
        nocow_args.writeback_path = true;
 
        while (1) {
+               struct btrfs_block_group *nocow_bg = NULL;
                struct btrfs_ordered_extent *ordered;
                struct btrfs_key found_key;
                struct btrfs_file_extent_item *fi;
@@ -2146,8 +2004,6 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
                int extent_type;
                bool is_prealloc;
 
-               nocow = false;
-
                ret = btrfs_lookup_file_extent(NULL, root, path, ino,
                                               cur_offset, 0);
                if (ret < 0)
@@ -2172,11 +2028,8 @@ next_slot:
                leaf = path->nodes[0];
                if (path->slots[0] >= btrfs_header_nritems(leaf)) {
                        ret = btrfs_next_leaf(root, path);
-                       if (ret < 0) {
-                               if (cow_start != (u64)-1)
-                                       cur_offset = cow_start;
+                       if (ret < 0)
                                goto error;
-                       }
                        if (ret > 0)
                                break;
                        leaf = path->nodes[0];
@@ -2209,7 +2062,7 @@ next_slot:
                if (found_key.offset > cur_offset) {
                        extent_end = found_key.offset;
                        extent_type = 0;
-                       goto out_check;
+                       goto must_cow;
                }
 
                /*
@@ -2239,24 +2092,22 @@ next_slot:
 
                nocow_args.start = cur_offset;
                ret = can_nocow_file_extent(path, &found_key, inode, &nocow_args);
-               if (ret < 0) {
-                       if (cow_start != (u64)-1)
-                               cur_offset = cow_start;
+               if (ret < 0)
                        goto error;
-               } else if (ret == 0) {
-                       goto out_check;
-               }
+               if (ret == 0)
+                       goto must_cow;
 
                ret = 0;
-               bg = btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr);
-               if (bg)
-                       nocow = true;
-out_check:
-               /*
-                * If nocow is false then record the beginning of the range
-                * that needs to be COWed
-                */
-               if (!nocow) {
+               nocow_bg = btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr);
+               if (!nocow_bg) {
+must_cow:
+                       /*
+                        * If we can't perform NOCOW writeback for the range,
+                        * then record the beginning of the range that needs to
+                        * be COWed.  It will be written out before the next
+                        * NOCOW range if we find one, or when exiting this
+                        * loop.
+                        */
                        if (cow_start == (u64)-1)
                                cow_start = cur_offset;
                        cur_offset = extent_end;
@@ -2275,11 +2126,12 @@ out_check:
                 */
                if (cow_start != (u64)-1) {
                        ret = fallback_to_cow(inode, locked_page,
-                                             cow_start, found_key.offset - 1,
-                                             page_started, nr_written);
-                       if (ret)
-                               goto error;
+                                             cow_start, found_key.offset - 1);
                        cow_start = (u64)-1;
+                       if (ret) {
+                               btrfs_dec_nocow_writers(nocow_bg);
+                               goto error;
+                       }
                }
 
                nocow_end = cur_offset + nocow_args.num_bytes - 1;
@@ -2296,6 +2148,7 @@ out_check:
                                          ram_bytes, BTRFS_COMPRESS_NONE,
                                          BTRFS_ORDERED_PREALLOC);
                        if (IS_ERR(em)) {
+                               btrfs_dec_nocow_writers(nocow_bg);
                                ret = PTR_ERR(em);
                                goto error;
                        }
@@ -2309,6 +2162,7 @@ out_check:
                                ? (1 << BTRFS_ORDERED_PREALLOC)
                                : (1 << BTRFS_ORDERED_NOCOW),
                                BTRFS_COMPRESS_NONE);
+               btrfs_dec_nocow_writers(nocow_bg);
                if (IS_ERR(ordered)) {
                        if (is_prealloc) {
                                btrfs_drop_extent_map_range(inode, cur_offset,
@@ -2318,11 +2172,6 @@ out_check:
                        goto error;
                }
 
-               if (nocow) {
-                       btrfs_dec_nocow_writers(bg);
-                       nocow = false;
-               }
-
                if (btrfs_is_data_reloc_root(root))
                        /*
                         * Error handled later, as we must prevent
@@ -2357,17 +2206,24 @@ out_check:
 
        if (cow_start != (u64)-1) {
                cur_offset = end;
-               ret = fallback_to_cow(inode, locked_page, cow_start, end,
-                                     page_started, nr_written);
+               ret = fallback_to_cow(inode, locked_page, cow_start, end);
+               cow_start = (u64)-1;
                if (ret)
                        goto error;
        }
 
-error:
-       if (nocow)
-               btrfs_dec_nocow_writers(bg);
+       btrfs_free_path(path);
+       return 0;
 
-       if (ret && cur_offset < end)
+error:
+       /*
+        * If an error happened while a COW region is outstanding, cur_offset
+        * needs to be reset to cow_start to ensure the COW region is unlocked
+        * as well.
+        */
+       if (cow_start != (u64)-1)
+               cur_offset = cow_start;
+       if (cur_offset < end)
                extent_clear_unlock_delalloc(inode, cur_offset, end,
                                             locked_page, EXTENT_LOCKED |
                                             EXTENT_DELALLOC | EXTENT_DEFRAG |
@@ -2395,49 +2251,37 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
  * being touched for the first time.
  */
 int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
-               u64 start, u64 end, int *page_started, unsigned long *nr_written,
-               struct writeback_control *wbc)
+                            u64 start, u64 end, struct writeback_control *wbc)
 {
-       int ret = 0;
        const bool zoned = btrfs_is_zoned(inode->root->fs_info);
+       int ret;
 
        /*
-        * The range must cover part of the @locked_page, or the returned
-        * @page_started can confuse the caller.
+        * The range must cover part of the @locked_page, or a return of 1
+        * can confuse the caller.
         */
        ASSERT(!(end <= page_offset(locked_page) ||
                 start >= page_offset(locked_page) + PAGE_SIZE));
 
        if (should_nocow(inode, start, end)) {
-               /*
-                * Normally on a zoned device we're only doing COW writes, but
-                * in case of relocation on a zoned filesystem we have taken
-                * precaution, that we're only writing sequentially. It's safe
-                * to use run_delalloc_nocow() here, like for  regular
-                * preallocated inodes.
-                */
-               ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
-               ret = run_delalloc_nocow(inode, locked_page, start, end,
-                                        page_started, nr_written);
+               ret = run_delalloc_nocow(inode, locked_page, start, end);
                goto out;
        }
 
        if (btrfs_inode_can_compress(inode) &&
            inode_need_compress(inode, start, end) &&
-           run_delalloc_compressed(inode, wbc, locked_page, start,
-                                   end, page_started, nr_written))
-               goto out;
+           run_delalloc_compressed(inode, locked_page, start, end, wbc))
+               return 1;
 
        if (zoned)
-               ret = run_delalloc_zoned(inode, locked_page, start, end,
-                                        page_started, nr_written, wbc);
+               ret = run_delalloc_cow(inode, locked_page, start, end, wbc,
+                                      true);
        else
-               ret = cow_file_range(inode, locked_page, start, end,
-                                    page_started, nr_written, 1, NULL);
+               ret = cow_file_range(inode, locked_page, start, end, NULL,
+                                    false, false);
 
 out:
-       ASSERT(ret <= 0);
-       if (ret)
+       if (ret < 0)
                btrfs_cleanup_ordered_extents(inode, locked_page, start,
                                              end - start + 1);
        return ret;
@@ -2840,23 +2684,19 @@ struct btrfs_writepage_fixup {
 
 static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
 {
-       struct btrfs_writepage_fixup *fixup;
+       struct btrfs_writepage_fixup *fixup =
+               container_of(work, struct btrfs_writepage_fixup, work);
        struct btrfs_ordered_extent *ordered;
        struct extent_state *cached_state = NULL;
        struct extent_changeset *data_reserved = NULL;
-       struct page *page;
-       struct btrfs_inode *inode;
-       u64 page_start;
-       u64 page_end;
+       struct page *page = fixup->page;
+       struct btrfs_inode *inode = fixup->inode;
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       u64 page_start = page_offset(page);
+       u64 page_end = page_offset(page) + PAGE_SIZE - 1;
        int ret = 0;
        bool free_delalloc_space = true;
 
-       fixup = container_of(work, struct btrfs_writepage_fixup, work);
-       page = fixup->page;
-       inode = fixup->inode;
-       page_start = page_offset(page);
-       page_end = page_offset(page) + PAGE_SIZE - 1;
-
        /*
         * This is similar to page_mkwrite, we need to reserve the space before
         * we take the page lock.
@@ -2949,10 +2789,12 @@ out_page:
                 * to reflect the errors and clean the page.
                 */
                mapping_set_error(page->mapping, ret);
-               end_extent_writepage(page, ret, page_start, page_end);
+               btrfs_mark_ordered_io_finished(inode, page, page_start,
+                                              PAGE_SIZE, !ret);
+               btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE);
                clear_page_dirty_for_io(page);
        }
-       btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
+       btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);
        unlock_page(page);
        put_page(page);
        kfree(fixup);
@@ -3359,6 +3201,13 @@ out:
                        btrfs_free_reserved_extent(fs_info,
                                        ordered_extent->disk_bytenr,
                                        ordered_extent->disk_num_bytes, 1);
+                       /*
+                        * Actually free the qgroup rsv which was released when
+                        * the ordered extent was created.
+                        */
+                       btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid,
+                                                 ordered_extent->qgroup_rsv,
+                                                 BTRFS_QGROUP_RSV_DATA);
                }
        }
 
@@ -3384,15 +3233,6 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
        return btrfs_finish_one_ordered(ordered);
 }
 
-void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
-                                         struct page *page, u64 start,
-                                         u64 end, bool uptodate)
-{
-       trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
-
-       btrfs_mark_ordered_io_finished(inode, page, start, end + 1 - start, uptodate);
-}
-
 /*
  * Verify the checksum for a single sector without any extra action that depend
  * on the type of I/O.
@@ -3482,15 +3322,21 @@ zeroit:
 void btrfs_add_delayed_iput(struct btrfs_inode *inode)
 {
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
+       unsigned long flags;
 
        if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1))
                return;
 
        atomic_inc(&fs_info->nr_delayed_iputs);
-       spin_lock(&fs_info->delayed_iput_lock);
+       /*
+        * Need to be irq safe here because we can be called from either an irq
+        * context (see bio.c and btrfs_put_ordered_extent()) or a non-irq
+        * context.
+        */
+       spin_lock_irqsave(&fs_info->delayed_iput_lock, flags);
        ASSERT(list_empty(&inode->delayed_iput));
        list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs);
-       spin_unlock(&fs_info->delayed_iput_lock);
+       spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags);
        if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
                wake_up_process(fs_info->cleaner_kthread);
 }
@@ -3499,37 +3345,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
                                    struct btrfs_inode *inode)
 {
        list_del_init(&inode->delayed_iput);
-       spin_unlock(&fs_info->delayed_iput_lock);
+       spin_unlock_irq(&fs_info->delayed_iput_lock);
        iput(&inode->vfs_inode);
        if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
                wake_up(&fs_info->delayed_iputs_wait);
-       spin_lock(&fs_info->delayed_iput_lock);
+       spin_lock_irq(&fs_info->delayed_iput_lock);
 }
 
 static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
                                   struct btrfs_inode *inode)
 {
        if (!list_empty(&inode->delayed_iput)) {
-               spin_lock(&fs_info->delayed_iput_lock);
+               spin_lock_irq(&fs_info->delayed_iput_lock);
                if (!list_empty(&inode->delayed_iput))
                        run_delayed_iput_locked(fs_info, inode);
-               spin_unlock(&fs_info->delayed_iput_lock);
+               spin_unlock_irq(&fs_info->delayed_iput_lock);
        }
 }
 
 void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
 {
-
-       spin_lock(&fs_info->delayed_iput_lock);
+       /*
+        * btrfs_put_ordered_extent() can run in irq context (see bio.c), which
+        * calls btrfs_add_delayed_iput() and that needs to lock
+        * fs_info->delayed_iput_lock. So we need to disable irqs here to
+        * prevent a deadlock.
+        */
+       spin_lock_irq(&fs_info->delayed_iput_lock);
        while (!list_empty(&fs_info->delayed_iputs)) {
                struct btrfs_inode *inode;
 
                inode = list_first_entry(&fs_info->delayed_iputs,
                                struct btrfs_inode, delayed_iput);
                run_delayed_iput_locked(fs_info, inode);
-               cond_resched_lock(&fs_info->delayed_iput_lock);
+               if (need_resched()) {
+                       spin_unlock_irq(&fs_info->delayed_iput_lock);
+                       cond_resched();
+                       spin_lock_irq(&fs_info->delayed_iput_lock);
+               }
        }
-       spin_unlock(&fs_info->delayed_iput_lock);
+       spin_unlock_irq(&fs_info->delayed_iput_lock);
 }
 
 /*
@@ -3647,9 +3502,16 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 */
 
                if (found_key.offset == last_objectid) {
+                       /*
+                        * We found the same inode as before. This means we were
+                        * not able to remove its items via eviction triggered
+                        * by an iput(). A transaction abort may have happened,
+                        * due to -ENOSPC for example, so try to grab the error
+                        * that lead to a transaction abort, if any.
+                        */
                        btrfs_err(fs_info,
                                  "Error removing orphan entry, stopping orphan cleanup");
-                       ret = -EINVAL;
+                       ret = BTRFS_FS_ERROR(fs_info) ?: -EINVAL;
                        goto out;
                }
 
@@ -3659,11 +3521,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                found_key.type = BTRFS_INODE_ITEM_KEY;
                found_key.offset = 0;
                inode = btrfs_iget(fs_info->sb, last_objectid, root);
-               ret = PTR_ERR_OR_ZERO(inode);
-               if (ret && ret != -ENOENT)
-                       goto out;
+               if (IS_ERR(inode)) {
+                       ret = PTR_ERR(inode);
+                       inode = NULL;
+                       if (ret != -ENOENT)
+                               goto out;
+               }
 
-               if (ret == -ENOENT && root == fs_info->tree_root) {
+               if (!inode && root == fs_info->tree_root) {
                        struct btrfs_root *dead_root;
                        int is_dead_root = 0;
 
@@ -3724,17 +3589,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 * deleted but wasn't. The inode number may have been reused,
                 * but either way, we can delete the orphan item.
                 */
-               if (ret == -ENOENT || inode->i_nlink) {
-                       if (!ret) {
+               if (!inode || inode->i_nlink) {
+                       if (inode) {
                                ret = btrfs_drop_verity_items(BTRFS_I(inode));
                                iput(inode);
+                               inode = NULL;
                                if (ret)
                                        goto out;
                        }
                        trans = btrfs_start_transaction(root, 1);
                        if (IS_ERR(trans)) {
                                ret = PTR_ERR(trans);
-                               iput(inode);
                                goto out;
                        }
                        btrfs_debug(fs_info, "auto deleting %Lu",
@@ -3742,10 +3607,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                        ret = btrfs_del_orphan_item(trans, root,
                                                    found_key.objectid);
                        btrfs_end_transaction(trans);
-                       if (ret) {
-                               iput(inode);
+                       if (ret)
                                goto out;
-                       }
                        continue;
                }
 
@@ -3901,8 +3764,8 @@ static int btrfs_read_locked_inode(struct inode *inode,
        inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
        inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
 
-       inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
-       inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
+       inode_set_ctime(inode, btrfs_timespec_sec(leaf, &inode_item->ctime),
+                       btrfs_timespec_nsec(leaf, &inode_item->ctime));
 
        BTRFS_I(inode)->i_otime.tv_sec =
                btrfs_timespec_sec(leaf, &inode_item->otime);
@@ -4073,9 +3936,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                                      inode->i_mtime.tv_nsec);
 
        btrfs_set_token_timespec_sec(&token, &item->ctime,
-                                    inode->i_ctime.tv_sec);
+                                    inode_get_ctime(inode).tv_sec);
        btrfs_set_token_timespec_nsec(&token, &item->ctime,
-                                     inode->i_ctime.tv_nsec);
+                                     inode_get_ctime(inode).tv_nsec);
 
        btrfs_set_token_timespec_sec(&token, &item->otime,
                                     BTRFS_I(inode)->i_otime.tv_sec);
@@ -4273,9 +4136,8 @@ err:
        btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2);
        inode_inc_iversion(&inode->vfs_inode);
        inode_inc_iversion(&dir->vfs_inode);
-       inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
-       dir->vfs_inode.i_mtime = inode->vfs_inode.i_ctime;
-       dir->vfs_inode.i_ctime = inode->vfs_inode.i_ctime;
+       inode_set_ctime_current(&inode->vfs_inode);
+       dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode);
        ret = btrfs_update_inode(trans, root, dir);
 out:
        return ret;
@@ -4448,8 +4310,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
 
        btrfs_i_size_write(dir, dir->vfs_inode.i_size - fname.disk_name.len * 2);
        inode_inc_iversion(&dir->vfs_inode);
-       dir->vfs_inode.i_mtime = current_time(&dir->vfs_inode);
-       dir->vfs_inode.i_ctime = dir->vfs_inode.i_mtime;
+       dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode);
        ret = btrfs_update_inode_fallback(trans, root, dir);
        if (ret)
                btrfs_abort_transaction(trans, ret);
@@ -4847,9 +4708,6 @@ again:
                ret = -ENOMEM;
                goto out;
        }
-       ret = set_page_extent_mapped(page);
-       if (ret < 0)
-               goto out_unlock;
 
        if (!PageUptodate(page)) {
                ret = btrfs_read_folio(NULL, page_folio(page));
@@ -4864,6 +4722,17 @@ again:
                        goto out_unlock;
                }
        }
+
+       /*
+        * We unlock the page after the io is completed and then re-lock it
+        * above.  release_folio() could have come in between that and cleared
+        * PagePrivate(), but left the page in the mapping.  Set the page mapped
+        * here to make sure it's properly set for the subpage stuff.
+        */
+       ret = set_page_extent_mapped(page);
+       if (ret < 0)
+               goto out_unlock;
+
        wait_on_page_writeback(page);
 
        lock_extent(io_tree, block_start, block_end, &cached_state);
@@ -5091,8 +4960,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
        if (newsize != oldsize) {
                inode_inc_iversion(inode);
                if (!(mask & (ATTR_CTIME | ATTR_MTIME))) {
-                       inode->i_mtime = current_time(inode);
-                       inode->i_ctime = inode->i_mtime;
+                       inode->i_mtime = inode_set_ctime_current(inode);
                }
        }
 
@@ -5714,11 +5582,11 @@ struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root
        return btrfs_iget_path(s, ino, root, NULL);
 }
 
-static struct inode *new_simple_dir(struct super_block *s,
+static struct inode *new_simple_dir(struct inode *dir,
                                    struct btrfs_key *key,
                                    struct btrfs_root *root)
 {
-       struct inode *inode = new_inode(s);
+       struct inode *inode = new_inode(dir->i_sb);
 
        if (!inode)
                return ERR_PTR(-ENOMEM);
@@ -5736,10 +5604,11 @@ static struct inode *new_simple_dir(struct super_block *s,
        inode->i_opflags &= ~IOP_XATTR;
        inode->i_fop = &simple_dir_operations;
        inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
-       inode->i_mtime = current_time(inode);
-       inode->i_atime = inode->i_mtime;
-       inode->i_ctime = inode->i_mtime;
+       inode->i_mtime = inode_set_ctime_current(inode);
+       inode->i_atime = dir->i_atime;
        BTRFS_I(inode)->i_otime = inode->i_mtime;
+       inode->i_uid = dir->i_uid;
+       inode->i_gid = dir->i_gid;
 
        return inode;
 }
@@ -5798,7 +5667,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
                if (ret != -ENOENT)
                        inode = ERR_PTR(ret);
                else
-                       inode = new_simple_dir(dir->i_sb, &location, root);
+                       inode = new_simple_dir(dir, &location, root);
        } else {
                inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
                btrfs_put_root(sub_root);
@@ -5849,6 +5718,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 }
 
 /*
+ * Find the highest existing sequence number in a directory and then set the
+ * in-memory index_cnt variable to the first free sequence number.
+ */
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+{
+       struct btrfs_root *root = inode->root;
+       struct btrfs_key key, found_key;
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       int ret;
+
+       key.objectid = btrfs_ino(inode);
+       key.type = BTRFS_DIR_INDEX_KEY;
+       key.offset = (u64)-1;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out;
+       /* FIXME: we should be able to handle this */
+       if (ret == 0)
+               goto out;
+       ret = 0;
+
+       if (path->slots[0] == 0) {
+               inode->index_cnt = BTRFS_DIR_START_INDEX;
+               goto out;
+       }
+
+       path->slots[0]--;
+
+       leaf = path->nodes[0];
+       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+       if (found_key.objectid != btrfs_ino(inode) ||
+           found_key.type != BTRFS_DIR_INDEX_KEY) {
+               inode->index_cnt = BTRFS_DIR_START_INDEX;
+               goto out;
+       }
+
+       inode->index_cnt = found_key.offset + 1;
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+{
+       if (dir->index_cnt == (u64)-1) {
+               int ret;
+
+               ret = btrfs_inode_delayed_dir_index_count(dir);
+               if (ret) {
+                       ret = btrfs_set_inode_index_count(dir);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       *index = dir->index_cnt;
+
+       return 0;
+}
+
+/*
  * All this infrastructure exists because dir_emit can fault, and we are holding
  * the tree lock when doing readdir.  For now just allocate a buffer and copy
  * our information into that, and then dir_emit from the buffer.  This is
@@ -5860,10 +5797,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 static int btrfs_opendir(struct inode *inode, struct file *file)
 {
        struct btrfs_file_private *private;
+       u64 last_index;
+       int ret;
+
+       ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
+       if (ret)
+               return ret;
 
        private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
        if (!private)
                return -ENOMEM;
+       private->last_index = last_index;
        private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
        if (!private->filldir_buf) {
                kfree(private);
@@ -5908,8 +5852,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        struct btrfs_key found_key;
        struct btrfs_path *path;
        void *addr;
-       struct list_head ins_list;
-       struct list_head del_list;
+       LIST_HEAD(ins_list);
+       LIST_HEAD(del_list);
        int ret;
        char *name_ptr;
        int name_len;
@@ -5928,9 +5872,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
        addr = private->filldir_buf;
        path->reada = READA_FORWARD;
 
-       INIT_LIST_HEAD(&ins_list);
-       INIT_LIST_HEAD(&del_list);
-       put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+       put = btrfs_readdir_get_delayed_items(inode, private->last_index,
+                                             &ins_list, &del_list);
 
 again:
        key.type = BTRFS_DIR_INDEX_KEY;
@@ -5948,6 +5891,8 @@ again:
                        break;
                if (found_key.offset < ctx->pos)
                        continue;
+               if (found_key.offset > private->last_index)
+                       break;
                if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
                        continue;
                di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
@@ -6063,8 +6008,7 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode)
  * This is a copy of file_update_time.  We need this so we can return error on
  * ENOSPC for updating the inode in the case of file write and mmap writes.
  */
-static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
-                            int flags)
+static int btrfs_update_time(struct inode *inode, int flags)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        bool dirty = flags & ~S_VERSION;
@@ -6072,69 +6016,11 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
        if (btrfs_root_readonly(root))
                return -EROFS;
 
-       if (flags & S_VERSION)
-               dirty |= inode_maybe_inc_iversion(inode, dirty);
-       if (flags & S_CTIME)
-               inode->i_ctime = *now;
-       if (flags & S_MTIME)
-               inode->i_mtime = *now;
-       if (flags & S_ATIME)
-               inode->i_atime = *now;
+       dirty = inode_update_timestamps(inode, flags);
        return dirty ? btrfs_dirty_inode(BTRFS_I(inode)) : 0;
 }
 
 /*
- * find the highest existing sequence number in a directory
- * and then set the in-memory index_cnt variable to reflect
- * free sequence numbers
- */
-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
-{
-       struct btrfs_root *root = inode->root;
-       struct btrfs_key key, found_key;
-       struct btrfs_path *path;
-       struct extent_buffer *leaf;
-       int ret;
-
-       key.objectid = btrfs_ino(inode);
-       key.type = BTRFS_DIR_INDEX_KEY;
-       key.offset = (u64)-1;
-
-       path = btrfs_alloc_path();
-       if (!path)
-               return -ENOMEM;
-
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-       if (ret < 0)
-               goto out;
-       /* FIXME: we should be able to handle this */
-       if (ret == 0)
-               goto out;
-       ret = 0;
-
-       if (path->slots[0] == 0) {
-               inode->index_cnt = BTRFS_DIR_START_INDEX;
-               goto out;
-       }
-
-       path->slots[0]--;
-
-       leaf = path->nodes[0];
-       btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
-       if (found_key.objectid != btrfs_ino(inode) ||
-           found_key.type != BTRFS_DIR_INDEX_KEY) {
-               inode->index_cnt = BTRFS_DIR_START_INDEX;
-               goto out;
-       }
-
-       inode->index_cnt = found_key.offset + 1;
-out:
-       btrfs_free_path(path);
-       return ret;
-}
-
-/*
  * helper to find a free sequence number in a given directory.  This current
  * code is very simple, later versions will do smarter things in the btree
  */
@@ -6378,9 +6264,8 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
                goto discard;
        }
 
-       inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_atime = inode->i_mtime;
-       inode->i_ctime = inode->i_mtime;
        BTRFS_I(inode)->i_otime = inode->i_mtime;
 
        /*
@@ -6545,12 +6430,10 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
         * log replay procedure is responsible for setting them to their correct
         * values (the ones it had when the fsync was done).
         */
-       if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
-               struct timespec64 now = current_time(&parent_inode->vfs_inode);
+       if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags))
+               parent_inode->vfs_inode.i_mtime =
+                       inode_set_ctime_current(&parent_inode->vfs_inode);
 
-               parent_inode->vfs_inode.i_mtime = now;
-               parent_inode->vfs_inode.i_ctime = now;
-       }
        ret = btrfs_update_inode(trans, root, parent_inode);
        if (ret)
                btrfs_abort_transaction(trans, ret);
@@ -6690,7 +6573,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
        BTRFS_I(inode)->dir_index = 0ULL;
        inc_nlink(inode);
        inode_inc_iversion(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        ihold(inode);
        set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
 
@@ -7849,8 +7732,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
 
                ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
                if (ret) {
-                       bbio->bio.bi_status = errno_to_blk_status(ret);
-                       btrfs_dio_end_io(bbio);
+                       btrfs_finish_ordered_extent(dio_data->ordered, NULL,
+                                                   file_offset, dip->bytes,
+                                                   !ret);
+                       bio->bi_status = errno_to_blk_status(ret);
+                       iomap_dio_bio_end_io(bio);
                        return;
                }
        }
@@ -8753,7 +8639,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
                                  STATX_ATTR_IMMUTABLE |
                                  STATX_ATTR_NODUMP);
 
-       generic_fillattr(idmap, inode, stat);
+       generic_fillattr(idmap, request_mask, inode, stat);
        stat->dev = BTRFS_I(inode)->root->anon_dev;
 
        spin_lock(&BTRFS_I(inode)->lock);
@@ -8777,7 +8663,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
        struct btrfs_root *dest = BTRFS_I(new_dir)->root;
        struct inode *new_inode = new_dentry->d_inode;
        struct inode *old_inode = old_dentry->d_inode;
-       struct timespec64 ctime = current_time(old_inode);
        struct btrfs_rename_ctx old_rename_ctx;
        struct btrfs_rename_ctx new_rename_ctx;
        u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
@@ -8908,12 +8793,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
        inode_inc_iversion(new_dir);
        inode_inc_iversion(old_inode);
        inode_inc_iversion(new_inode);
-       old_dir->i_mtime = ctime;
-       old_dir->i_ctime = ctime;
-       new_dir->i_mtime = ctime;
-       new_dir->i_ctime = ctime;
-       old_inode->i_ctime = ctime;
-       new_inode->i_ctime = ctime;
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
        if (old_dentry->d_parent != new_dentry->d_parent) {
                btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
@@ -9177,11 +9057,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
        inode_inc_iversion(old_dir);
        inode_inc_iversion(new_dir);
        inode_inc_iversion(old_inode);
-       old_dir->i_mtime = current_time(old_dir);
-       old_dir->i_ctime = old_dir->i_mtime;
-       new_dir->i_mtime = old_dir->i_mtime;
-       new_dir->i_ctime = old_dir->i_mtime;
-       old_inode->i_ctime = old_dir->i_mtime;
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
        if (old_dentry->d_parent != new_dentry->d_parent)
                btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
@@ -9203,7 +9079,6 @@ static int btrfs_rename(struct mnt_idmap *idmap,
 
        if (new_inode) {
                inode_inc_iversion(new_inode);
-               new_inode->i_ctime = current_time(new_inode);
                if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
                             BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
                        ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry);
@@ -9336,14 +9211,11 @@ static int start_delalloc_inodes(struct btrfs_root *root,
        struct btrfs_inode *binode;
        struct inode *inode;
        struct btrfs_delalloc_work *work, *next;
-       struct list_head works;
-       struct list_head splice;
+       LIST_HEAD(works);
+       LIST_HEAD(splice);
        int ret = 0;
        bool full_flush = wbc->nr_to_write == LONG_MAX;
 
-       INIT_LIST_HEAD(&works);
-       INIT_LIST_HEAD(&splice);
-
        mutex_lock(&root->delalloc_mutex);
        spin_lock(&root->delalloc_lock);
        list_splice_init(&root->delalloc_inodes, &splice);
@@ -9431,14 +9303,12 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
                .range_end = LLONG_MAX,
        };
        struct btrfs_root *root;
-       struct list_head splice;
+       LIST_HEAD(splice);
        int ret;
 
        if (BTRFS_FS_ERROR(fs_info))
                return -EROFS;
 
-       INIT_LIST_HEAD(&splice);
-
        mutex_lock(&fs_info->delalloc_root_mutex);
        spin_lock(&fs_info->delalloc_root_lock);
        list_splice_init(&fs_info->delalloc_roots, &splice);
@@ -9743,7 +9613,7 @@ next:
                *alloc_hint = ins.objectid + ins.offset;
 
                inode_inc_iversion(inode);
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
                if (!(mode & FALLOC_FL_KEEP_SIZE) &&
                    (actual_len > inode->i_size) &&
index a895d10..a18ee7b 100644 (file)
@@ -384,7 +384,7 @@ update_flags:
        binode->flags = binode_flags;
        btrfs_sync_inode_flags_to_i_flags(inode);
        inode_inc_iversion(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
 
  out_end_trans:
index 23fc11a..7695dec 100644 (file)
 #ifdef CONFIG_PRINTK
 
 #define STATE_STRING_PREFACE   ": state "
-#define STATE_STRING_BUF_LEN   (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
+#define STATE_STRING_BUF_LEN   (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1)
 
 /*
  * Characters to print to indicate error conditions or uncommon filesystem state.
  * RO is not an error.
  */
 static const char fs_state_chars[] = {
-       [BTRFS_FS_STATE_ERROR]                  = 'E',
        [BTRFS_FS_STATE_REMOUNTING]             = 'M',
        [BTRFS_FS_STATE_RO]                     = 0,
        [BTRFS_FS_STATE_TRANS_ABORTED]          = 'A',
@@ -37,6 +36,11 @@ static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
        memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
        curr += sizeof(STATE_STRING_PREFACE) - 1;
 
+       if (BTRFS_FS_ERROR(info)) {
+               *curr++ = 'E';
+               states_printed = true;
+       }
+
        for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
                WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
                if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
@@ -155,7 +159,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
         * Today we only save the error info to memory.  Long term we'll also
         * send it down to the disk.
         */
-       set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
+       WRITE_ONCE(fs_info->fs_error, errno);
 
        /* Don't go through full error handling during mount. */
        if (!(sb->s_flags & SB_BORN))
@@ -252,12 +256,6 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt,
 }
 #endif
 
-void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
-{
-       btrfs_err(fs_info,
-"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
-}
-
 #if BITS_PER_LONG == 32
 void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info)
 {
index deedc1a..1ae6f8e 100644 (file)
@@ -181,8 +181,6 @@ do {                                                                \
 #define ASSERT(expr)   (void)(expr)
 #endif
 
-void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info);
-
 __printf(5, 6)
 __cold
 void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
index a629532..b46ab34 100644 (file)
@@ -410,6 +410,10 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
        unsigned long flags;
        u64 cur = file_offset;
 
+       trace_btrfs_writepage_end_io_hook(inode, file_offset,
+                                         file_offset + num_bytes - 1,
+                                         uptodate);
+
        spin_lock_irqsave(&tree->lock, flags);
        while (cur < file_offset + num_bytes) {
                u64 entry_end;
@@ -736,11 +740,9 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
                             const u64 range_start, const u64 range_len)
 {
        struct btrfs_root *root;
-       struct list_head splice;
+       LIST_HEAD(splice);
        u64 done;
 
-       INIT_LIST_HEAD(&splice);
-
        mutex_lock(&fs_info->ordered_operations_mutex);
        spin_lock(&fs_info->ordered_root_lock);
        list_splice_init(&fs_info->ordered_roots, &splice);
index aa06d9c..0c93439 100644 (file)
@@ -95,8 +95,10 @@ static void print_extent_item(const struct extent_buffer *eb, int slot, int type
        int ref_index = 0;
 
        if (unlikely(item_size < sizeof(*ei))) {
-               btrfs_print_v0_err(eb->fs_info);
-               btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+               btrfs_err(eb->fs_info,
+                         "unexpected extent item size, has %u expect >= %zu",
+                         item_size, sizeof(*ei));
+               btrfs_handle_fs_error(eb->fs_info, -EUCLEAN, NULL);
        }
 
        ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
@@ -291,10 +293,6 @@ void btrfs_print_leaf(const struct extent_buffer *l)
                               btrfs_file_extent_num_bytes(l, fi),
                               btrfs_file_extent_ram_bytes(l, fi));
                        break;
-               case BTRFS_EXTENT_REF_V0_KEY:
-                       btrfs_print_v0_err(fs_info);
-                       btrfs_handle_fs_error(fs_info, -EINVAL, NULL);
-                       break;
                case BTRFS_BLOCK_GROUP_ITEM_KEY:
                        bi = btrfs_item_ptr(l, i,
                                            struct btrfs_block_group_item);
index da1f84a..b99230d 100644 (file)
@@ -3590,15 +3590,16 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
         * going to clear all tracking information for a clean start.
         */
 
-       trans = btrfs_join_transaction(fs_info->fs_root);
-       if (IS_ERR(trans)) {
+       trans = btrfs_attach_transaction_barrier(fs_info->fs_root);
+       if (IS_ERR(trans) && trans != ERR_PTR(-ENOENT)) {
                fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
                return PTR_ERR(trans);
-       }
-       ret = btrfs_commit_transaction(trans);
-       if (ret) {
-               fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
-               return ret;
+       } else if (trans != ERR_PTR(-ENOENT)) {
+               ret = btrfs_commit_transaction(trans);
+               if (ret) {
+                       fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+                       return ret;
+               }
        }
 
        qgroup_rescan_zero_tracking(fs_info);
@@ -3757,9 +3758,11 @@ static int try_flush_qgroup(struct btrfs_root *root)
                goto out;
        btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
 
-       trans = btrfs_join_transaction(root);
+       trans = btrfs_attach_transaction_barrier(root);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
+               if (ret == -ENOENT)
+                       ret = 0;
                goto out;
        }
 
@@ -4445,4 +4448,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
                ulist_free(entry->old_roots);
                kfree(entry);
        }
+       *root = RB_ROOT;
 }
index f37b925..3e014b9 100644 (file)
@@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work);
 static void index_rbio_pages(struct btrfs_raid_bio *rbio);
 static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
 
-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check);
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio);
 static void scrub_rbio_work_locked(struct work_struct *work);
 
 static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
@@ -584,8 +584,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
        if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
                return 0;
 
-       if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
-           last->operation == BTRFS_RBIO_READ_REBUILD)
+       if (last->operation == BTRFS_RBIO_READ_REBUILD)
                return 0;
 
        return 1;
@@ -784,10 +783,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
                        spin_unlock(&rbio->bio_list_lock);
                        spin_unlock(&h->lock);
 
-                       if (next->operation == BTRFS_RBIO_READ_REBUILD)
-                               start_async_work(next, recover_rbio_work_locked);
-                       else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
-                               steal_rbio(rbio, next);
+                       if (next->operation == BTRFS_RBIO_READ_REBUILD) {
                                start_async_work(next, recover_rbio_work_locked);
                        } else if (next->operation == BTRFS_RBIO_WRITE) {
                                steal_rbio(rbio, next);
@@ -1517,11 +1513,11 @@ static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio,
        while ((bio = bio_list_pop(bio_list))) {
                bio->bi_end_io = raid_wait_read_end_io;
 
-               if (trace_raid56_scrub_read_recover_enabled()) {
+               if (trace_raid56_read_enabled()) {
                        struct raid56_bio_trace_info trace_info = { 0 };
 
                        bio_get_trace_info(rbio, bio, &trace_info);
-                       trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
+                       trace_raid56_read(rbio, bio, &trace_info);
                }
                submit_bio(bio);
        }
@@ -1698,8 +1694,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
         * If we're rebuilding a read, we have to use pages from the
         * bio list if possible.
         */
-       if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
-            rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
+       if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
                sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
        } else {
                sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
@@ -1763,8 +1758,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
                 * If we're rebuilding a read, we have to use pages from the
                 * bio list if possible.
                 */
-               if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
-                    rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
+               if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
                        sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
                } else {
                        sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
@@ -1897,8 +1891,7 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)
                goto out;
        }
 
-       if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
-           rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
+       if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
                spin_lock(&rbio->bio_list_lock);
                set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
                spin_unlock(&rbio->bio_list_lock);
@@ -2112,8 +2105,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
                goto error;
        }
 
-       ret = btrfs_lookup_csums_bitmap(csum_root, start, start + len - 1,
-                                       rbio->csum_buf, rbio->csum_bitmap, false);
+       ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1,
+                                       rbio->csum_buf, rbio->csum_bitmap);
        if (ret < 0)
                goto error;
        if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits))
@@ -2198,11 +2191,11 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
        while ((bio = bio_list_pop(bio_list))) {
                bio->bi_end_io = raid_wait_write_end_io;
 
-               if (trace_raid56_write_stripe_enabled()) {
+               if (trace_raid56_write_enabled()) {
                        struct raid56_bio_trace_info trace_info = { 0 };
 
                        bio_get_trace_info(rbio, bio, &trace_info);
-                       trace_raid56_write_stripe(rbio, bio, &trace_info);
+                       trace_raid56_write(rbio, bio, &trace_info);
                }
                submit_bio(bio);
        }
@@ -2404,7 +2397,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
        return 0;
 }
 
-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
 {
        struct btrfs_io_context *bioc = rbio->bioc;
        const u32 sectorsize = bioc->fs_info->sectorsize;
@@ -2445,9 +2438,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
         */
        clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
 
-       if (!need_check)
-               goto writeback;
-
        p_sector.page = alloc_page(GFP_NOFS);
        if (!p_sector.page)
                return -ENOMEM;
@@ -2516,7 +2506,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
                q_sector.page = NULL;
        }
 
-writeback:
        /*
         * time to start writing.  Make bios for everything from the
         * higher layers (the bio_list in our rbio) and our p/q.  Ignore
@@ -2699,7 +2688,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
 
 static void scrub_rbio(struct btrfs_raid_bio *rbio)
 {
-       bool need_check = false;
        int sector_nr;
        int ret;
 
@@ -2722,7 +2710,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
         * We have every sector properly prepared. Can finish the scrub
         * and writeback the good content.
         */
-       ret = finish_parity_scrub(rbio, need_check);
+       ret = finish_parity_scrub(rbio);
        wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
        for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
                int found_errors;
index 0e84c9c..45e6ff7 100644 (file)
@@ -14,7 +14,6 @@ enum btrfs_rbio_ops {
        BTRFS_RBIO_WRITE,
        BTRFS_RBIO_READ_REBUILD,
        BTRFS_RBIO_PARITY_SCRUB,
-       BTRFS_RBIO_REBUILD_MISSING,
 };
 
 struct btrfs_raid_bio {
index 0474bbe..65d2bd6 100644 (file)
@@ -30,8 +30,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 
        inode_inc_iversion(inode);
        if (!no_time_update) {
-               inode->i_mtime = current_time(inode);
-               inode->i_ctime = inode->i_mtime;
+               inode->i_mtime = inode_set_ctime_current(inode);
        }
        /*
         * We round up to the block size at eof when determining which
index 25a3361..9951a0c 100644 (file)
@@ -1916,7 +1916,39 @@ again:
                                err = PTR_ERR(root);
                        break;
                }
-               ASSERT(root->reloc_root == reloc_root);
+
+               if (unlikely(root->reloc_root != reloc_root)) {
+                       if (root->reloc_root) {
+                               btrfs_err(fs_info,
+"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu",
+                                         root->root_key.objectid,
+                                         root->reloc_root->root_key.objectid,
+                                         root->reloc_root->root_key.type,
+                                         root->reloc_root->root_key.offset,
+                                         btrfs_root_generation(
+                                                 &root->reloc_root->root_item),
+                                         reloc_root->root_key.objectid,
+                                         reloc_root->root_key.type,
+                                         reloc_root->root_key.offset,
+                                         btrfs_root_generation(
+                                                 &reloc_root->root_item));
+                       } else {
+                               btrfs_err(fs_info,
+"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu",
+                                         root->root_key.objectid,
+                                         reloc_root->root_key.objectid,
+                                         reloc_root->root_key.type,
+                                         reloc_root->root_key.offset,
+                                         btrfs_root_generation(
+                                                 &reloc_root->root_item));
+                       }
+                       list_add(&reloc_root->root_list, &reloc_roots);
+                       btrfs_put_root(root);
+                       btrfs_abort_transaction(trans, -EUCLEAN);
+                       if (!err)
+                               err = -EUCLEAN;
+                       break;
+               }
 
                /*
                 * set reference count to 1, so btrfs_recover_relocation
@@ -1989,7 +2021,7 @@ again:
                root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
                                         false);
                if (btrfs_root_refs(&reloc_root->root_item) > 0) {
-                       if (IS_ERR(root)) {
+                       if (WARN_ON(IS_ERR(root))) {
                                /*
                                 * For recovery we read the fs roots on mount,
                                 * and if we didn't find the root then we marked
@@ -1998,17 +2030,14 @@ again:
                                 * memory.  However there's no reason we can't
                                 * handle the error properly here just in case.
                                 */
-                               ASSERT(0);
                                ret = PTR_ERR(root);
                                goto out;
                        }
-                       if (root->reloc_root != reloc_root) {
+                       if (WARN_ON(root->reloc_root != reloc_root)) {
                                /*
-                                * This is actually impossible without something
-                                * going really wrong (like weird race condition
-                                * or cosmic rays).
+                                * This can happen if on-disk metadata has some
+                                * corruption, e.g. bad reloc tree key offset.
                                 */
-                               ASSERT(0);
                                ret = -EINVAL;
                                goto out;
                        }
@@ -2977,9 +3006,6 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
                if (!page)
                        return -ENOMEM;
        }
-       ret = set_page_extent_mapped(page);
-       if (ret < 0)
-               goto release_page;
 
        if (PageReadahead(page))
                page_cache_async_readahead(inode->i_mapping, ra, NULL,
@@ -2995,6 +3021,15 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
                }
        }
 
+       /*
+        * We could have lost page private when we dropped the lock to read the
+        * page above, make sure we set_page_extent_mapped here so we have any
+        * of the subpage blocksize stuff we need in place.
+        */
+       ret = set_page_extent_mapped(page);
+       if (ret < 0)
+               goto release_page;
+
        page_start = page_offset(page);
        page_end = page_start + PAGE_SIZE - 1;
 
@@ -3221,12 +3256,13 @@ static int add_tree_block(struct reloc_control *rc,
                        if (type == BTRFS_TREE_BLOCK_REF_KEY)
                                owner = btrfs_extent_inline_ref_offset(eb, iref);
                }
-       } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
-               btrfs_print_v0_err(eb->fs_info);
-               btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
-               return -EINVAL;
        } else {
-               BUG();
+               btrfs_print_leaf(eb);
+               btrfs_err(rc->block_group->fs_info,
+                         "unrecognized tree backref at tree block %llu slot %u",
+                         eb->start, path->slots[0]);
+               btrfs_release_path(path);
+               return -EUCLEAN;
        }
 
        btrfs_release_path(path);
@@ -3469,6 +3505,8 @@ int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
 
        last = rc->block_group->start + rc->block_group->length;
        while (1) {
+               bool block_found;
+
                cond_resched();
                if (rc->search_start >= last) {
                        ret = 1;
@@ -3519,11 +3557,11 @@ next:
                        goto next;
                }
 
-               ret = find_first_extent_bit(&rc->processed_blocks,
-                                           key.objectid, &start, &end,
-                                           EXTENT_DIRTY, NULL);
+               block_found = find_first_extent_bit(&rc->processed_blocks,
+                                                   key.objectid, &start, &end,
+                                                   EXTENT_DIRTY, NULL);
 
-               if (ret == 0 && start <= key.objectid) {
+               if (block_found && start <= key.objectid) {
                        btrfs_release_path(path);
                        rc->search_start = end + 1;
                } else {
index 4cae41b..b877203 100644 (file)
@@ -43,9 +43,20 @@ struct scrub_ctx;
 /*
  * The following value only influences the performance.
  *
- * This determines the batch size for stripe submitted in one go.
+ * This detemines how many stripes would be submitted in one go,
+ * which is 512KiB (BTRFS_STRIPE_LEN * SCRUB_STRIPES_PER_GROUP).
  */
-#define SCRUB_STRIPES_PER_SCTX 8       /* That would be 8 64K stripe per-device. */
+#define SCRUB_STRIPES_PER_GROUP                8
+
+/*
+ * How many groups we have for each sctx.
+ *
+ * This would be 8M per device, the same value as the old scrub in-flight bios
+ * size limit.
+ */
+#define SCRUB_GROUPS_PER_SCTX          16
+
+#define SCRUB_TOTAL_STRIPES            (SCRUB_GROUPS_PER_SCTX * SCRUB_STRIPES_PER_GROUP)
 
 /*
  * The following value times PAGE_SIZE needs to be large enough to match the
@@ -172,9 +183,11 @@ struct scrub_stripe {
 };
 
 struct scrub_ctx {
-       struct scrub_stripe     stripes[SCRUB_STRIPES_PER_SCTX];
+       struct scrub_stripe     stripes[SCRUB_TOTAL_STRIPES];
        struct scrub_stripe     *raid56_data_stripes;
        struct btrfs_fs_info    *fs_info;
+       struct btrfs_path       extent_path;
+       struct btrfs_path       csum_path;
        int                     first_free;
        int                     cur_stripe;
        atomic_t                cancel_req;
@@ -315,10 +328,10 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
        if (!sctx)
                return;
 
-       for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
+       for (i = 0; i < SCRUB_TOTAL_STRIPES; i++)
                release_scrub_stripe(&sctx->stripes[i]);
 
-       kfree(sctx);
+       kvfree(sctx);
 }
 
 static void scrub_put_ctx(struct scrub_ctx *sctx)
@@ -333,13 +346,20 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
        struct scrub_ctx *sctx;
        int             i;
 
-       sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
+       /* Since sctx has inline 128 stripes, it can go beyond 64K easily.  Use
+        * kvzalloc().
+        */
+       sctx = kvzalloc(sizeof(*sctx), GFP_KERNEL);
        if (!sctx)
                goto nomem;
        refcount_set(&sctx->refs, 1);
        sctx->is_dev_replace = is_dev_replace;
        sctx->fs_info = fs_info;
-       for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
+       sctx->extent_path.search_commit_root = 1;
+       sctx->extent_path.skip_locking = 1;
+       sctx->csum_path.search_commit_root = 1;
+       sctx->csum_path.skip_locking = 1;
+       for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) {
                int ret;
 
                ret = init_scrub_stripe(fs_info, &sctx->stripes[i]);
@@ -605,7 +625,8 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
                              btrfs_stack_header_bytenr(header), logical);
                return;
        }
-       if (memcmp(header->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE) != 0) {
+       if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid,
+                  BTRFS_FSID_SIZE) != 0) {
                bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
                bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
                btrfs_warn_rl(fs_info,
@@ -969,6 +990,9 @@ skip:
        spin_unlock(&sctx->stat_lock);
 }
 
+static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *stripe,
+                               unsigned long write_bitmap, bool dev_replace);
+
 /*
  * The main entrance for all read related scrub work, including:
  *
@@ -977,13 +1001,16 @@ skip:
  * - Go through the remaining mirrors and try to read as large blocksize as
  *   possible
  * - Go through all mirrors (including the failed mirror) sector-by-sector
+ * - Submit writeback for repaired sectors
  *
- * Writeback does not happen here, it needs extra synchronization.
+ * Writeback for dev-replace does not happen here, it needs extra
+ * synchronization for zoned devices.
  */
 static void scrub_stripe_read_repair_worker(struct work_struct *work)
 {
        struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work);
-       struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+       struct scrub_ctx *sctx = stripe->sctx;
+       struct btrfs_fs_info *fs_info = sctx->fs_info;
        int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
                                          stripe->bg->length);
        int mirror;
@@ -1048,7 +1075,23 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
                        goto out;
        }
 out:
-       scrub_stripe_report_errors(stripe->sctx, stripe);
+       /*
+        * Submit the repaired sectors.  For zoned case, we cannot do repair
+        * in-place, but queue the bg to be relocated.
+        */
+       if (btrfs_is_zoned(fs_info)) {
+               if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+                       btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start);
+       } else if (!sctx->readonly) {
+               unsigned long repaired;
+
+               bitmap_andnot(&repaired, &stripe->init_error_bitmap,
+                             &stripe->error_bitmap, stripe->nr_sectors);
+               scrub_write_sectors(sctx, stripe, repaired, false);
+               wait_scrub_stripe_io(stripe);
+       }
+
+       scrub_stripe_report_errors(sctx, stripe);
        set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
        wake_up(&stripe->repair_wait);
 }
@@ -1261,7 +1304,6 @@ static int get_raid56_logic_offset(u64 physical, int num,
 
                /* Work out the disk rotation on this stripe-set */
                rot = stripe_nr % map->num_stripes;
-               stripe_nr /= map->num_stripes;
                /* calculate which stripe this data locates */
                rot += i;
                stripe_index = rot % map->num_stripes;
@@ -1467,6 +1509,8 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
  * Return <0 for error.
  */
 static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
+                                       struct btrfs_path *extent_path,
+                                       struct btrfs_path *csum_path,
                                        struct btrfs_device *dev, u64 physical,
                                        int mirror_num, u64 logical_start,
                                        u32 logical_len,
@@ -1476,7 +1520,6 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
        struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start);
        struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start);
        const u64 logical_end = logical_start + logical_len;
-       struct btrfs_path path = { 0 };
        u64 cur_logical = logical_start;
        u64 stripe_end;
        u64 extent_start;
@@ -1492,14 +1535,13 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
        /* The range must be inside the bg. */
        ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
 
-       path.search_commit_root = 1;
-       path.skip_locking = 1;
-
-       ret = find_first_extent_item(extent_root, &path, logical_start, logical_len);
+       ret = find_first_extent_item(extent_root, extent_path, logical_start,
+                                    logical_len);
        /* Either error or not found. */
        if (ret)
                goto out;
-       get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen);
+       get_extent_info(extent_path, &extent_start, &extent_len, &extent_flags,
+                       &extent_gen);
        if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
                stripe->nr_meta_extents++;
        if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
@@ -1527,7 +1569,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
 
        /* Fill the extent info for the remaining sectors. */
        while (cur_logical <= stripe_end) {
-               ret = find_first_extent_item(extent_root, &path, cur_logical,
+               ret = find_first_extent_item(extent_root, extent_path, cur_logical,
                                             stripe_end - cur_logical + 1);
                if (ret < 0)
                        goto out;
@@ -1535,7 +1577,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
                        ret = 0;
                        break;
                }
-               get_extent_info(&path, &extent_start, &extent_len,
+               get_extent_info(extent_path, &extent_start, &extent_len,
                                &extent_flags, &extent_gen);
                if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
                        stripe->nr_meta_extents++;
@@ -1560,9 +1602,9 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
                 */
                ASSERT(BITS_PER_LONG >= BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
 
-               ret = btrfs_lookup_csums_bitmap(csum_root, stripe->logical,
-                                               stripe_end, stripe->csums,
-                                               &csum_bitmap, true);
+               ret = btrfs_lookup_csums_bitmap(csum_root, csum_path,
+                                               stripe->logical, stripe_end,
+                                               stripe->csums, &csum_bitmap);
                if (ret < 0)
                        goto out;
                if (ret > 0)
@@ -1575,7 +1617,6 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
        }
        set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
 out:
-       btrfs_release_path(&path);
        return ret;
 }
 
@@ -1653,6 +1694,28 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
        return false;
 }
 
+static void submit_initial_group_read(struct scrub_ctx *sctx,
+                                     unsigned int first_slot,
+                                     unsigned int nr_stripes)
+{
+       struct blk_plug plug;
+
+       ASSERT(first_slot < SCRUB_TOTAL_STRIPES);
+       ASSERT(first_slot + nr_stripes <= SCRUB_TOTAL_STRIPES);
+
+       scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
+                             btrfs_stripe_nr_to_offset(nr_stripes));
+       blk_start_plug(&plug);
+       for (int i = 0; i < nr_stripes; i++) {
+               struct scrub_stripe *stripe = &sctx->stripes[first_slot + i];
+
+               /* Those stripes should be initialized. */
+               ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));
+               scrub_submit_initial_read(sctx, stripe);
+       }
+       blk_finish_plug(&plug);
+}
+
 static int flush_scrub_stripes(struct scrub_ctx *sctx)
 {
        struct btrfs_fs_info *fs_info = sctx->fs_info;
@@ -1665,11 +1728,11 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
 
        ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state));
 
-       scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
-                             btrfs_stripe_nr_to_offset(nr_stripes));
-       for (int i = 0; i < nr_stripes; i++) {
-               stripe = &sctx->stripes[i];
-               scrub_submit_initial_read(sctx, stripe);
+       /* Submit the stripes which are populated but not submitted. */
+       if (nr_stripes % SCRUB_STRIPES_PER_GROUP) {
+               const int first_slot = round_down(nr_stripes, SCRUB_STRIPES_PER_GROUP);
+
+               submit_initial_group_read(sctx, first_slot, nr_stripes - first_slot);
        }
 
        for (int i = 0; i < nr_stripes; i++) {
@@ -1679,32 +1742,6 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
                           test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state));
        }
 
-       /*
-        * Submit the repaired sectors.  For zoned case, we cannot do repair
-        * in-place, but queue the bg to be relocated.
-        */
-       if (btrfs_is_zoned(fs_info)) {
-               for (int i = 0; i < nr_stripes; i++) {
-                       stripe = &sctx->stripes[i];
-
-                       if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) {
-                               btrfs_repair_one_zone(fs_info,
-                                                     sctx->stripes[0].bg->start);
-                               break;
-                       }
-               }
-       } else if (!sctx->readonly) {
-               for (int i = 0; i < nr_stripes; i++) {
-                       unsigned long repaired;
-
-                       stripe = &sctx->stripes[i];
-
-                       bitmap_andnot(&repaired, &stripe->init_error_bitmap,
-                                     &stripe->error_bitmap, stripe->nr_sectors);
-                       scrub_write_sectors(sctx, stripe, repaired, false);
-               }
-       }
-
        /* Submit for dev-replace. */
        if (sctx->is_dev_replace) {
                /*
@@ -1749,28 +1786,40 @@ static void raid56_scrub_wait_endio(struct bio *bio)
 
 static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg,
                              struct btrfs_device *dev, int mirror_num,
-                             u64 logical, u32 length, u64 physical)
+                             u64 logical, u32 length, u64 physical,
+                             u64 *found_logical_ret)
 {
        struct scrub_stripe *stripe;
        int ret;
 
-       /* No available slot, submit all stripes and wait for them. */
-       if (sctx->cur_stripe >= SCRUB_STRIPES_PER_SCTX) {
-               ret = flush_scrub_stripes(sctx);
-               if (ret < 0)
-                       return ret;
-       }
+       /*
+        * There should always be one slot left, as caller filling the last
+        * slot should flush them all.
+        */
+       ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES);
 
        stripe = &sctx->stripes[sctx->cur_stripe];
-
-       /* We can queue one stripe using the remaining slot. */
        scrub_reset_stripe(stripe);
-       ret = scrub_find_fill_first_stripe(bg, dev, physical, mirror_num,
-                                          logical, length, stripe);
+       ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
+                                          &sctx->csum_path, dev, physical,
+                                          mirror_num, logical, length, stripe);
        /* Either >0 as no more extents or <0 for error. */
        if (ret)
                return ret;
+       if (found_logical_ret)
+               *found_logical_ret = stripe->logical;
        sctx->cur_stripe++;
+
+       /* We filled one group, submit it. */
+       if (sctx->cur_stripe % SCRUB_STRIPES_PER_GROUP == 0) {
+               const int first_slot = sctx->cur_stripe - SCRUB_STRIPES_PER_GROUP;
+
+               submit_initial_group_read(sctx, first_slot, SCRUB_STRIPES_PER_GROUP);
+       }
+
+       /* Last slot used, flush them all. */
+       if (sctx->cur_stripe == SCRUB_TOTAL_STRIPES)
+               return flush_scrub_stripes(sctx);
        return 0;
 }
 
@@ -1784,6 +1833,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
        struct btrfs_fs_info *fs_info = sctx->fs_info;
        struct btrfs_raid_bio *rbio;
        struct btrfs_io_context *bioc = NULL;
+       struct btrfs_path extent_path = { 0 };
+       struct btrfs_path csum_path = { 0 };
        struct bio *bio;
        struct scrub_stripe *stripe;
        bool all_empty = true;
@@ -1794,6 +1845,16 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 
        ASSERT(sctx->raid56_data_stripes);
 
+       /*
+        * For data stripe search, we cannot re-use the same extent/csum paths,
+        * as the data stripe bytenr may be smaller than previous extent.  Thus
+        * we have to use our own extent/csum paths.
+        */
+       extent_path.search_commit_root = 1;
+       extent_path.skip_locking = 1;
+       csum_path.search_commit_root = 1;
+       csum_path.skip_locking = 1;
+
        for (int i = 0; i < data_stripes; i++) {
                int stripe_index;
                int rot;
@@ -1808,7 +1869,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
 
                scrub_reset_stripe(stripe);
                set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
-               ret = scrub_find_fill_first_stripe(bg,
+               ret = scrub_find_fill_first_stripe(bg, &extent_path, &csum_path,
                                map->stripes[stripe_index].dev, physical, 1,
                                full_stripe_start + btrfs_stripe_nr_to_offset(i),
                                BTRFS_STRIPE_LEN, stripe);
@@ -1853,24 +1914,6 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
        /* For now, no zoned support for RAID56. */
        ASSERT(!btrfs_is_zoned(sctx->fs_info));
 
-       /* Writeback for the repaired sectors. */
-       for (int i = 0; i < data_stripes; i++) {
-               unsigned long repaired;
-
-               stripe = &sctx->raid56_data_stripes[i];
-
-               bitmap_andnot(&repaired, &stripe->init_error_bitmap,
-                             &stripe->error_bitmap, stripe->nr_sectors);
-               scrub_write_sectors(sctx, stripe, repaired, false);
-       }
-
-       /* Wait for the above writebacks to finish. */
-       for (int i = 0; i < data_stripes; i++) {
-               stripe = &sctx->raid56_data_stripes[i];
-
-               wait_scrub_stripe_io(stripe);
-       }
-
        /*
         * Now all data stripes are properly verified. Check if we have any
         * unrepaired, if so abort immediately or we could further corrupt the
@@ -1936,6 +1979,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
        bio_put(bio);
        btrfs_bio_counter_dec(fs_info);
 
+       btrfs_release_path(&extent_path);
+       btrfs_release_path(&csum_path);
 out:
        return ret;
 }
@@ -1957,18 +2002,15 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 {
        struct btrfs_fs_info *fs_info = sctx->fs_info;
        const u64 logical_end = logical_start + logical_length;
-       /* An artificial limit, inherit from old scrub behavior */
-       struct btrfs_path path = { 0 };
        u64 cur_logical = logical_start;
        int ret;
 
        /* The range must be inside the bg */
        ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
 
-       path.search_commit_root = 1;
-       path.skip_locking = 1;
        /* Go through each extent items inside the logical range */
        while (cur_logical < logical_end) {
+               u64 found_logical;
                u64 cur_physical = physical + cur_logical - logical_start;
 
                /* Canceled? */
@@ -1993,7 +2035,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
 
                ret = queue_scrub_stripe(sctx, bg, device, mirror_num,
                                         cur_logical, logical_end - cur_logical,
-                                        cur_physical);
+                                        cur_physical, &found_logical);
                if (ret > 0) {
                        /* No more extent, just update the accounting */
                        sctx->stat.last_physical = physical + logical_length;
@@ -2003,14 +2045,11 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
                if (ret < 0)
                        break;
 
-               ASSERT(sctx->cur_stripe > 0);
-               cur_logical = sctx->stripes[sctx->cur_stripe - 1].logical
-                             + BTRFS_STRIPE_LEN;
+               cur_logical = found_logical + BTRFS_STRIPE_LEN;
 
                /* Don't hold CPU for too long time */
                cond_resched();
        }
-       btrfs_release_path(&path);
        return ret;
 }
 
@@ -2108,6 +2147,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        u64 stripe_logical;
        int stop_loop = 0;
 
+       /* Extent_path should be released by now. */
+       ASSERT(sctx->extent_path.nodes[0] == NULL);
+
        scrub_blocked_if_needed(fs_info);
 
        if (sctx->is_dev_replace &&
@@ -2226,6 +2268,9 @@ out:
        ret2 = flush_scrub_stripes(sctx);
        if (!ret)
                ret = ret2;
+       btrfs_release_path(&sctx->extent_path);
+       btrfs_release_path(&sctx->csum_path);
+
        if (sctx->raid56_data_stripes) {
                for (int i = 0; i < nr_data_stripes(map); i++)
                        release_scrub_stripe(&sctx->raid56_data_stripes[i]);
@@ -2710,8 +2755,7 @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info)
 /*
  * get a reference count on fs_info->scrub_workers. start worker if necessary
  */
-static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
-                                               int is_dev_replace)
+static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info)
 {
        struct workqueue_struct *scrub_workers = NULL;
        unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
@@ -2721,10 +2765,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
        if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
                return 0;
 
-       if (is_dev_replace)
-               scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags);
-       else
-               scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
+       scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
        if (!scrub_workers)
                return -ENOMEM;
 
@@ -2776,7 +2817,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
        if (IS_ERR(sctx))
                return PTR_ERR(sctx);
 
-       ret = scrub_workers_get(fs_info, is_dev_replace);
+       ret = scrub_workers_get(fs_info);
        if (ret)
                goto out_free_ctx;
 
index 8bfd447..3a56615 100644 (file)
@@ -3685,7 +3685,7 @@ static void tail_append_pending_moves(struct send_ctx *sctx,
 static int apply_children_dir_moves(struct send_ctx *sctx)
 {
        struct pending_dir_move *pm;
-       struct list_head stack;
+       LIST_HEAD(stack);
        u64 parent_ino = sctx->cur_ino;
        int ret = 0;
 
@@ -3693,7 +3693,6 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
        if (!pm)
                return 0;
 
-       INIT_LIST_HEAD(&stack);
        tail_append_pending_moves(sctx, pm, &stack);
 
        while (!list_empty(&stack)) {
@@ -4165,7 +4164,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
        int ret = 0;
        struct recorded_ref *cur;
        struct recorded_ref *cur2;
-       struct list_head check_dirs;
+       LIST_HEAD(check_dirs);
        struct fs_path *valid_path = NULL;
        u64 ow_inode = 0;
        u64 ow_gen;
@@ -4184,7 +4183,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
         * which is always '..'
         */
        BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
-       INIT_LIST_HEAD(&check_dirs);
 
        valid_path = fs_path_alloc();
        if (!valid_path) {
index 75e7fa3..d7e8cd4 100644 (file)
@@ -389,11 +389,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
                return 0;
 
        used = btrfs_space_info_used(space_info, true);
-       if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
-           (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
-               avail = 0;
-       else
-               avail = calc_available_free_space(fs_info, space_info, flush);
+       avail = calc_available_free_space(fs_info, space_info, flush);
 
        if (used + bytes < space_info->total_bytes + avail)
                return 1;
@@ -510,6 +506,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
                           int dump_block_groups)
 {
        struct btrfs_block_group *cache;
+       u64 total_avail = 0;
        int index = 0;
 
        spin_lock(&info->lock);
@@ -523,18 +520,27 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
        down_read(&info->groups_sem);
 again:
        list_for_each_entry(cache, &info->block_groups[index], list) {
+               u64 avail;
+
                spin_lock(&cache->lock);
+               avail = cache->length - cache->used - cache->pinned -
+                       cache->reserved - cache->delalloc_bytes -
+                       cache->bytes_super - cache->zone_unusable;
                btrfs_info(fs_info,
-                       "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
-                       cache->start, cache->length, cache->used, cache->pinned,
-                       cache->reserved, cache->zone_unusable,
-                       cache->ro ? "[readonly]" : "");
+"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s",
+                          cache->start, cache->length, cache->used, cache->pinned,
+                          cache->reserved, cache->delalloc_bytes,
+                          cache->bytes_super, cache->zone_unusable,
+                          avail, cache->ro ? "[readonly]" : "");
                spin_unlock(&cache->lock);
                btrfs_dump_free_space(cache, bytes);
+               total_avail += avail;
        }
        if (++index < BTRFS_NR_RAID_TYPES)
                goto again;
        up_read(&info->groups_sem);
+
+       btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
 }
 
 static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
@@ -715,9 +721,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                else
                        nr = -1;
 
-               trans = btrfs_join_transaction(root);
+               trans = btrfs_join_transaction_nostart(root);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
+                       if (ret == -ENOENT)
+                               ret = 0;
                        break;
                }
                ret = btrfs_run_delayed_items_nr(trans, nr);
@@ -733,9 +741,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                break;
        case FLUSH_DELAYED_REFS_NR:
        case FLUSH_DELAYED_REFS:
-               trans = btrfs_join_transaction(root);
+               trans = btrfs_join_transaction_nostart(root);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
+                       if (ret == -ENOENT)
+                               ret = 0;
                        break;
                }
                if (state == FLUSH_DELAYED_REFS_NR)
@@ -747,18 +757,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                break;
        case ALLOC_CHUNK:
        case ALLOC_CHUNK_FORCE:
-               /*
-                * For metadata space on zoned filesystem, reaching here means we
-                * don't have enough space left in active_total_bytes. Try to
-                * activate a block group first, because we may have inactive
-                * block group already allocated.
-                */
-               ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false);
-               if (ret < 0)
-                       break;
-               else if (ret == 1)
-                       break;
-
                trans = btrfs_join_transaction(root);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
@@ -770,22 +768,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                                        CHUNK_ALLOC_FORCE);
                btrfs_end_transaction(trans);
 
-               /*
-                * For metadata space on zoned filesystem, allocating a new chunk
-                * is not enough. We still need to activate the block * group.
-                * Active the newly allocated block group by (maybe) finishing
-                * a block group.
-                */
-               if (ret == 1) {
-                       ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
-                       /*
-                        * Revert to the original ret regardless we could finish
-                        * one block group or not.
-                        */
-                       if (ret >= 0)
-                               ret = 1;
-               }
-
                if (ret > 0 || ret == -ENOSPC)
                        ret = 0;
                break;
@@ -800,9 +782,18 @@ static void flush_space(struct btrfs_fs_info *fs_info,
                break;
        case COMMIT_TRANS:
                ASSERT(current->journal_info == NULL);
-               trans = btrfs_join_transaction(root);
+               /*
+                * We don't want to start a new transaction, just attach to the
+                * current one or wait it fully commits in case its commit is
+                * happening at the moment. Note: we don't use a nostart join
+                * because that does not wait for a transaction to fully commit
+                * (only for it to be unblocked, state TRANS_STATE_UNBLOCKED).
+                */
+               trans = btrfs_attach_transaction_barrier(root);
                if (IS_ERR(trans)) {
                        ret = PTR_ERR(trans);
+                       if (ret == -ENOENT)
+                               ret = 0;
                        break;
                }
                ret = btrfs_commit_transaction(trans);
@@ -1408,8 +1399,18 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
                }
        }
 
-       /* Attempt to steal from the global rsv if we can. */
-       if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
+       /*
+        * Attempt to steal from the global rsv if we can, except if the fs was
+        * turned into error mode due to a transaction abort when flushing space
+        * above, in that case fail with the abort error instead of returning
+        * success to the caller if we can steal from the global rsv - this is
+        * just to have caller fail immeditelly instead of later when trying to
+        * modify the fs, making it easier to debug -ENOSPC problems.
+        */
+       if (BTRFS_FS_ERROR(fs_info)) {
+               ticket->error = BTRFS_FS_ERROR(fs_info);
+               remove_ticket(space_info, ticket);
+       } else if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
                ticket->error = -ENOSPC;
                remove_ticket(space_info, ticket);
        }
index f1dd172..09bfe68 100644 (file)
@@ -709,12 +709,16 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                        break;
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
                case Opt_check_integrity_including_extent_data:
+                       btrfs_warn(info,
+       "integrity checker is deprecated and will be removed in 6.7");
                        btrfs_info(info,
                                   "enabling check integrity including extent data");
                        btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA);
                        btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
                        break;
                case Opt_check_integrity:
+                       btrfs_warn(info,
+       "integrity checker is deprecated and will be removed in 6.7");
                        btrfs_info(info, "enabling check integrity");
                        btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
                        break;
@@ -727,6 +731,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
                                goto out;
                        }
                        info->check_integrity_print_mask = intarg;
+                       btrfs_warn(info,
+       "integrity checker is deprecated and will be removed in 6.7");
                        btrfs_info(info, "check_integrity_print_mask 0x%x",
                                   info->check_integrity_print_mask);
                        break;
@@ -2144,7 +2150,7 @@ static struct file_system_type btrfs_fs_type = {
        .name           = "btrfs",
        .mount          = btrfs_mount,
        .kill_sb        = btrfs_kill_super,
-       .fs_flags       = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
+       .fs_flags       = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_MGTIME,
 };
 
 static struct file_system_type btrfs_root_fs_type = {
@@ -2152,7 +2158,8 @@ static struct file_system_type btrfs_root_fs_type = {
        .name           = "btrfs",
        .mount          = btrfs_mount_root,
        .kill_sb        = btrfs_kill_super,
-       .fs_flags       = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
+       .fs_flags       = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA |
+                         FS_ALLOW_IDMAP | FS_MGTIME,
 };
 
 MODULE_ALIAS_FS("btrfs");
index 25294e6..b1d1ac2 100644 (file)
@@ -414,6 +414,12 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
 BTRFS_ATTR(static_feature, supported_sectorsizes,
           supported_sectorsizes_show);
 
+static ssize_t acl_show(struct kobject *kobj, struct kobj_attribute *a, char *buf)
+{
+       return sysfs_emit(buf, "%d\n", !!IS_ENABLED(CONFIG_BTRFS_FS_POSIX_ACL));
+}
+BTRFS_ATTR(static_feature, acl, acl_show);
+
 /*
  * Features which only depend on kernel version.
  *
@@ -421,6 +427,7 @@ BTRFS_ATTR(static_feature, supported_sectorsizes,
  * btrfs_supported_feature_attrs.
  */
 static struct attribute *btrfs_supported_static_feature_attrs[] = {
+       BTRFS_ATTR_PTR(static_feature, acl),
        BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
        BTRFS_ATTR_PTR(static_feature, supported_checksums),
        BTRFS_ATTR_PTR(static_feature, send_stream_version),
index f6bc6d7..1cc86af 100644 (file)
@@ -319,86 +319,139 @@ out:
        return ret;
 }
 
-static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
-                          unsigned long len)
+static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb)
 {
        unsigned long i;
 
-       for (i = 0; i < len * BITS_PER_BYTE; i++) {
+       for (i = 0; i < eb->len * BITS_PER_BYTE; i++) {
                int bit, bit1;
 
                bit = !!test_bit(i, bitmap);
                bit1 = !!extent_buffer_test_bit(eb, 0, i);
                if (bit1 != bit) {
-                       test_err("bits do not match");
+                       u8 has;
+                       u8 expect;
+
+                       read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
+                       expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
+
+                       test_err(
+               "bits do not match, start byte 0 bit %lu, byte %lu has 0x%02x expect 0x%02x",
+                                i, i / BITS_PER_BYTE, has, expect);
                        return -EINVAL;
                }
 
                bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
                                                i % BITS_PER_BYTE);
                if (bit1 != bit) {
-                       test_err("offset bits do not match");
+                       u8 has;
+                       u8 expect;
+
+                       read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
+                       expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
+
+                       test_err(
+               "bits do not match, start byte %lu bit %lu, byte %lu has 0x%02x expect 0x%02x",
+                                i / BITS_PER_BYTE, i % BITS_PER_BYTE,
+                                i / BITS_PER_BYTE, has, expect);
                        return -EINVAL;
                }
        }
        return 0;
 }
 
-static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
-                            unsigned long len)
+static int test_bitmap_set(const char *name, unsigned long *bitmap,
+                          struct extent_buffer *eb,
+                          unsigned long byte_start, unsigned long bit_start,
+                          unsigned long bit_len)
+{
+       int ret;
+
+       bitmap_set(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
+       extent_buffer_bitmap_set(eb, byte_start, bit_start, bit_len);
+       ret = check_eb_bitmap(bitmap, eb);
+       if (ret < 0)
+               test_err("%s test failed", name);
+       return ret;
+}
+
+static int test_bitmap_clear(const char *name, unsigned long *bitmap,
+                            struct extent_buffer *eb,
+                            unsigned long byte_start, unsigned long bit_start,
+                            unsigned long bit_len)
+{
+       int ret;
+
+       bitmap_clear(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
+       extent_buffer_bitmap_clear(eb, byte_start, bit_start, bit_len);
+       ret = check_eb_bitmap(bitmap, eb);
+       if (ret < 0)
+               test_err("%s test failed", name);
+       return ret;
+}
+static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb)
 {
        unsigned long i, j;
+       unsigned long byte_len = eb->len;
        u32 x;
        int ret;
 
-       memset(bitmap, 0, len);
-       memzero_extent_buffer(eb, 0, len);
-       if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
-               test_err("bitmap was not zeroed");
-               return -EINVAL;
-       }
+       ret = test_bitmap_clear("clear all run 1", bitmap, eb, 0, 0,
+                               byte_len * BITS_PER_BYTE);
+       if (ret < 0)
+               return ret;
 
-       bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
-       extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-       ret = check_eb_bitmap(bitmap, eb, len);
-       if (ret) {
-               test_err("setting all bits failed");
+       ret = test_bitmap_set("set all", bitmap, eb, 0, 0, byte_len * BITS_PER_BYTE);
+       if (ret < 0)
                return ret;
-       }
 
-       bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
-       extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
-       ret = check_eb_bitmap(bitmap, eb, len);
-       if (ret) {
-               test_err("clearing all bits failed");
+       ret = test_bitmap_clear("clear all run 2", bitmap, eb, 0, 0,
+                               byte_len * BITS_PER_BYTE);
+       if (ret < 0)
+               return ret;
+
+       ret = test_bitmap_set("same byte set", bitmap, eb, 0, 2, 4);
+       if (ret < 0)
+               return ret;
+
+       ret = test_bitmap_clear("same byte partial clear", bitmap, eb, 0, 4, 1);
+       if (ret < 0)
+               return ret;
+
+       ret = test_bitmap_set("cross byte set", bitmap, eb, 2, 4, 8);
+       if (ret < 0)
+               return ret;
+
+       ret = test_bitmap_set("cross multi byte set", bitmap, eb, 4, 4, 24);
+       if (ret < 0)
+               return ret;
+
+       ret = test_bitmap_clear("cross byte clear", bitmap, eb, 2, 6, 4);
+       if (ret < 0)
+               return ret;
+
+       ret = test_bitmap_clear("cross multi byte clear", bitmap, eb, 4, 6, 20);
+       if (ret < 0)
                return ret;
-       }
 
        /* Straddling pages test */
-       if (len > PAGE_SIZE) {
-               bitmap_set(bitmap,
-                       (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
-                       sizeof(long) * BITS_PER_BYTE);
-               extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
-                                       sizeof(long) * BITS_PER_BYTE);
-               ret = check_eb_bitmap(bitmap, eb, len);
-               if (ret) {
-                       test_err("setting straddling pages failed");
+       if (byte_len > PAGE_SIZE) {
+               ret = test_bitmap_set("cross page set", bitmap, eb,
+                                     PAGE_SIZE - sizeof(long) / 2, 0,
+                                     sizeof(long) * BITS_PER_BYTE);
+               if (ret < 0)
+                       return ret;
+
+               ret = test_bitmap_set("cross page set all", bitmap, eb, 0, 0,
+                                     byte_len * BITS_PER_BYTE);
+               if (ret < 0)
                        return ret;
-               }
 
-               bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
-               bitmap_clear(bitmap,
-                       (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
-                       sizeof(long) * BITS_PER_BYTE);
-               extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
-               extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
+               ret = test_bitmap_clear("cross page clear", bitmap, eb,
+                                       PAGE_SIZE - sizeof(long) / 2, 0,
                                        sizeof(long) * BITS_PER_BYTE);
-               ret = check_eb_bitmap(bitmap, eb, len);
-               if (ret) {
-                       test_err("clearing straddling pages failed");
+               if (ret < 0)
                        return ret;
-               }
        }
 
        /*
@@ -406,9 +459,12 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
         * something repetitive that could miss some hypothetical off-by-n bug.
         */
        x = 0;
-       bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
-       extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
-       for (i = 0; i < len * BITS_PER_BYTE / 32; i++) {
+       ret = test_bitmap_clear("clear all run 3", bitmap, eb, 0, 0,
+                               byte_len * BITS_PER_BYTE);
+       if (ret < 0)
+               return ret;
+
+       for (i = 0; i < byte_len * BITS_PER_BYTE / 32; i++) {
                x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU;
                for (j = 0; j < 32; j++) {
                        if (x & (1U << j)) {
@@ -418,7 +474,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
                }
        }
 
-       ret = check_eb_bitmap(bitmap, eb, len);
+       ret = check_eb_bitmap(bitmap, eb);
        if (ret) {
                test_err("random bit pattern failed");
                return ret;
@@ -456,7 +512,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
                goto out;
        }
 
-       ret = __test_eb_bitmaps(bitmap, eb, nodesize);
+       ret = __test_eb_bitmaps(bitmap, eb);
        if (ret)
                goto out;
 
@@ -473,7 +529,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
                goto out;
        }
 
-       ret = __test_eb_bitmaps(bitmap, eb, nodesize);
+       ret = __test_eb_bitmaps(bitmap, eb);
 out:
        free_extent_buffer(eb);
        kfree(bitmap);
@@ -592,6 +648,146 @@ out:
        return ret;
 }
 
+static void dump_eb_and_memory_contents(struct extent_buffer *eb, void *memory,
+                                       const char *test_name)
+{
+       for (int i = 0; i < eb->len; i++) {
+               struct page *page = eb->pages[i >> PAGE_SHIFT];
+               void *addr = page_address(page) + offset_in_page(i);
+
+               if (memcmp(addr, memory + i, 1) != 0) {
+                       test_err("%s failed", test_name);
+                       test_err("eb and memory diffs at byte %u, eb has 0x%02x memory has 0x%02x",
+                                i, *(u8 *)addr, *(u8 *)(memory + i));
+                       return;
+               }
+       }
+}
+
+static int verify_eb_and_memory(struct extent_buffer *eb, void *memory,
+                               const char *test_name)
+{
+       for (int i = 0; i < (eb->len >> PAGE_SHIFT); i++) {
+               void *eb_addr = page_address(eb->pages[i]);
+
+               if (memcmp(memory + (i << PAGE_SHIFT), eb_addr, PAGE_SIZE) != 0) {
+                       dump_eb_and_memory_contents(eb, memory, test_name);
+                       return -EUCLEAN;
+               }
+       }
+       return 0;
+}
+
+/*
+ * Init both memory and extent buffer contents to the same randomly generated
+ * contents.
+ */
+static void init_eb_and_memory(struct extent_buffer *eb, void *memory)
+{
+       get_random_bytes(memory, eb->len);
+       write_extent_buffer(eb, memory, 0, eb->len);
+}
+
+static int test_eb_mem_ops(u32 sectorsize, u32 nodesize)
+{
+       struct btrfs_fs_info *fs_info;
+       struct extent_buffer *eb = NULL;
+       void *memory = NULL;
+       int ret;
+
+       test_msg("running extent buffer memory operation tests");
+
+       fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
+       if (!fs_info) {
+               test_std_err(TEST_ALLOC_FS_INFO);
+               return -ENOMEM;
+       }
+
+       memory = kvzalloc(nodesize, GFP_KERNEL);
+       if (!memory) {
+               test_err("failed to allocate memory");
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       eb = __alloc_dummy_extent_buffer(fs_info, SZ_1M, nodesize);
+       if (!eb) {
+               test_std_err(TEST_ALLOC_EXTENT_BUFFER);
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       init_eb_and_memory(eb, memory);
+       ret = verify_eb_and_memory(eb, memory, "full eb write");
+       if (ret < 0)
+               goto out;
+
+       memcpy(memory, memory + 16, 16);
+       memcpy_extent_buffer(eb, 0, 16, 16);
+       ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 1");
+       if (ret < 0)
+               goto out;
+
+       memcpy(memory, memory + 2048, 16);
+       memcpy_extent_buffer(eb, 0, 2048, 16);
+       ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 2");
+       if (ret < 0)
+               goto out;
+       memcpy(memory, memory + 2048, 2048);
+       memcpy_extent_buffer(eb, 0, 2048, 2048);
+       ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 3");
+       if (ret < 0)
+               goto out;
+
+       memmove(memory + 512, memory + 256, 512);
+       memmove_extent_buffer(eb, 512, 256, 512);
+       ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 1");
+       if (ret < 0)
+               goto out;
+
+       memmove(memory + 2048, memory + 512, 2048);
+       memmove_extent_buffer(eb, 2048, 512, 2048);
+       ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 2");
+       if (ret < 0)
+               goto out;
+       memmove(memory + 512, memory + 2048, 2048);
+       memmove_extent_buffer(eb, 512, 2048, 2048);
+       ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 3");
+       if (ret < 0)
+               goto out;
+
+       if (nodesize > PAGE_SIZE) {
+               memcpy(memory, memory + 4096 - 128, 256);
+               memcpy_extent_buffer(eb, 0, 4096 - 128, 256);
+               ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 1");
+               if (ret < 0)
+                       goto out;
+
+               memcpy(memory + 4096 - 128, memory + 4096 + 128, 256);
+               memcpy_extent_buffer(eb, 4096 - 128, 4096 + 128, 256);
+               ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 2");
+               if (ret < 0)
+                       goto out;
+
+               memmove(memory + 4096 - 128, memory + 4096 - 64, 256);
+               memmove_extent_buffer(eb, 4096 - 128, 4096 - 64, 256);
+               ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 1");
+               if (ret < 0)
+                       goto out;
+
+               memmove(memory + 4096 - 64, memory + 4096 - 128, 256);
+               memmove_extent_buffer(eb, 4096 - 64, 4096 - 128, 256);
+               ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 2");
+               if (ret < 0)
+                       goto out;
+       }
+out:
+       free_extent_buffer(eb);
+       kvfree(memory);
+       btrfs_free_dummy_fs_info(fs_info);
+       return ret;
+}
+
 int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
 {
        int ret;
@@ -607,6 +803,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
                goto out;
 
        ret = test_eb_bitmaps(sectorsize, nodesize);
+       if (ret)
+               goto out;
+
+       ret = test_eb_mem_ops(sectorsize, nodesize);
 out:
        return ret;
 }
index ed0f36a..29bdd08 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include "btrfs-tests.h"
 #include "../ctree.h"
+#include "../btrfs_inode.h"
 #include "../volumes.h"
 #include "../disk-io.h"
 #include "../block-group.h"
@@ -442,6 +443,406 @@ static int test_case_4(struct btrfs_fs_info *fs_info,
        return ret;
 }
 
+static int add_compressed_extent(struct extent_map_tree *em_tree,
+                                u64 start, u64 len, u64 block_start)
+{
+       struct extent_map *em;
+       int ret;
+
+       em = alloc_extent_map();
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               return -ENOMEM;
+       }
+
+       em->start = start;
+       em->len = len;
+       em->block_start = block_start;
+       em->block_len = SZ_4K;
+       set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+       write_lock(&em_tree->lock);
+       ret = add_extent_mapping(em_tree, em, 0);
+       write_unlock(&em_tree->lock);
+       free_extent_map(em);
+       if (ret < 0) {
+               test_err("cannot add extent map [%llu, %llu)", start, start + len);
+               return ret;
+       }
+
+       return 0;
+}
+
+struct extent_range {
+       u64 start;
+       u64 len;
+};
+
+/* The valid states of the tree after every drop, as described below. */
+struct extent_range valid_ranges[][7] = {
+       {
+         { .start = 0,                 .len = SZ_8K },         /* [0, 8K) */
+         { .start = SZ_4K * 3,         .len = SZ_4K * 3},      /* [12k, 24k) */
+         { .start = SZ_4K * 6,         .len = SZ_4K * 3},      /* [24k, 36k) */
+         { .start = SZ_32K + SZ_4K,    .len = SZ_4K},          /* [36k, 40k) */
+         { .start = SZ_4K * 10,        .len = SZ_4K * 6},      /* [40k, 64k) */
+       },
+       {
+         { .start = 0,                 .len = SZ_8K },         /* [0, 8K) */
+         { .start = SZ_4K * 5,         .len = SZ_4K},          /* [20k, 24k) */
+         { .start = SZ_4K * 6,         .len = SZ_4K * 3},      /* [24k, 36k) */
+         { .start = SZ_32K + SZ_4K,    .len = SZ_4K},          /* [36k, 40k) */
+         { .start = SZ_4K * 10,        .len = SZ_4K * 6},      /* [40k, 64k) */
+       },
+       {
+         { .start = 0,                 .len = SZ_8K },         /* [0, 8K) */
+         { .start = SZ_4K * 5,         .len = SZ_4K},          /* [20k, 24k) */
+         { .start = SZ_4K * 6,         .len = SZ_4K},          /* [24k, 28k) */
+         { .start = SZ_32K,            .len = SZ_4K},          /* [32k, 36k) */
+         { .start = SZ_32K + SZ_4K,    .len = SZ_4K},          /* [36k, 40k) */
+         { .start = SZ_4K * 10,        .len = SZ_4K * 6},      /* [40k, 64k) */
+       },
+       {
+         { .start = 0,                 .len = SZ_8K},          /* [0, 8K) */
+         { .start = SZ_4K * 5,         .len = SZ_4K},          /* [20k, 24k) */
+         { .start = SZ_4K * 6,         .len = SZ_4K},          /* [24k, 28k) */
+       }
+};
+
+static int validate_range(struct extent_map_tree *em_tree, int index)
+{
+       struct rb_node *n;
+       int i;
+
+       for (i = 0, n = rb_first_cached(&em_tree->map);
+            valid_ranges[index][i].len && n;
+            i++, n = rb_next(n)) {
+               struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);
+
+               if (entry->start != valid_ranges[index][i].start) {
+                       test_err("mapping has start %llu expected %llu",
+                                entry->start, valid_ranges[index][i].start);
+                       return -EINVAL;
+               }
+
+               if (entry->len != valid_ranges[index][i].len) {
+                       test_err("mapping has len %llu expected %llu",
+                                entry->len, valid_ranges[index][i].len);
+                       return -EINVAL;
+               }
+       }
+
+       /*
+        * We exited because we don't have any more entries in the extent_map
+        * but we still expect more valid entries.
+        */
+       if (valid_ranges[index][i].len) {
+               test_err("missing an entry");
+               return -EINVAL;
+       }
+
+       /* We exited the loop but still have entries in the extent map. */
+       if (n) {
+               test_err("we have a left over entry in the extent map we didn't expect");
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/*
+ * Test scenario:
+ *
+ * Test the various edge cases of btrfs_drop_extent_map_range, create the
+ * following ranges
+ *
+ * [0, 12k)[12k, 24k)[24k, 36k)[36k, 40k)[40k,64k)
+ *
+ * And then we'll drop:
+ *
+ * [8k, 12k) - test the single front split
+ * [12k, 20k) - test the single back split
+ * [28k, 32k) - test the double split
+ * [32k, 64k) - test whole em dropping
+ *
+ * They'll have the EXTENT_FLAG_COMPRESSED flag set to keep the em tree from
+ * merging the em's.
+ */
+static int test_case_5(void)
+{
+       struct extent_map_tree *em_tree;
+       struct inode *inode;
+       u64 start, end;
+       int ret;
+
+       test_msg("Running btrfs_drop_extent_map_range tests");
+
+       inode = btrfs_new_test_inode();
+       if (!inode) {
+               test_std_err(TEST_ALLOC_INODE);
+               return -ENOMEM;
+       }
+
+       em_tree = &BTRFS_I(inode)->extent_tree;
+
+       /* [0, 12k) */
+       ret = add_compressed_extent(em_tree, 0, SZ_4K * 3, 0);
+       if (ret) {
+               test_err("cannot add extent range [0, 12K)");
+               goto out;
+       }
+
+       /* [12k, 24k) */
+       ret = add_compressed_extent(em_tree, SZ_4K * 3, SZ_4K * 3, SZ_4K);
+       if (ret) {
+               test_err("cannot add extent range [12k, 24k)");
+               goto out;
+       }
+
+       /* [24k, 36k) */
+       ret = add_compressed_extent(em_tree, SZ_4K * 6, SZ_4K * 3, SZ_8K);
+       if (ret) {
+               test_err("cannot add extent range [12k, 24k)");
+               goto out;
+       }
+
+       /* [36k, 40k) */
+       ret = add_compressed_extent(em_tree, SZ_32K + SZ_4K, SZ_4K, SZ_4K * 3);
+       if (ret) {
+               test_err("cannot add extent range [12k, 24k)");
+               goto out;
+       }
+
+       /* [40k, 64k) */
+       ret = add_compressed_extent(em_tree, SZ_4K * 10, SZ_4K * 6, SZ_16K);
+       if (ret) {
+               test_err("cannot add extent range [12k, 24k)");
+               goto out;
+       }
+
+       /* Drop [8k, 12k) */
+       start = SZ_8K;
+       end = (3 * SZ_4K) - 1;
+       btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+       ret = validate_range(&BTRFS_I(inode)->extent_tree, 0);
+       if (ret)
+               goto out;
+
+       /* Drop [12k, 20k) */
+       start = SZ_4K * 3;
+       end = SZ_16K + SZ_4K - 1;
+       btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+       ret = validate_range(&BTRFS_I(inode)->extent_tree, 1);
+       if (ret)
+               goto out;
+
+       /* Drop [28k, 32k) */
+       start = SZ_32K - SZ_4K;
+       end = SZ_32K - 1;
+       btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+       ret = validate_range(&BTRFS_I(inode)->extent_tree, 2);
+       if (ret)
+               goto out;
+
+       /* Drop [32k, 64k) */
+       start = SZ_32K;
+       end = SZ_64K - 1;
+       btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+       ret = validate_range(&BTRFS_I(inode)->extent_tree, 3);
+       if (ret)
+               goto out;
+out:
+       iput(inode);
+       return ret;
+}
+
+/*
+ * Test the btrfs_add_extent_mapping helper which will attempt to create an em
+ * for areas between two existing ems.  Validate it doesn't do this when there
+ * are two unmerged em's side by side.
+ */
+static int test_case_6(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree)
+{
+       struct extent_map *em = NULL;
+       int ret;
+
+       ret = add_compressed_extent(em_tree, 0, SZ_4K, 0);
+       if (ret)
+               goto out;
+
+       ret = add_compressed_extent(em_tree, SZ_4K, SZ_4K, 0);
+       if (ret)
+               goto out;
+
+       em = alloc_extent_map();
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               return -ENOMEM;
+       }
+
+       em->start = SZ_4K;
+       em->len = SZ_4K;
+       em->block_start = SZ_16K;
+       em->block_len = SZ_16K;
+       write_lock(&em_tree->lock);
+       ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, 0, SZ_8K);
+       write_unlock(&em_tree->lock);
+
+       if (ret != 0) {
+               test_err("got an error when adding our em: %d", ret);
+               goto out;
+       }
+
+       ret = -EINVAL;
+       if (em->start != 0) {
+               test_err("unexpected em->start at %llu, wanted 0", em->start);
+               goto out;
+       }
+       if (em->len != SZ_4K) {
+               test_err("unexpected em->len %llu, expected 4K", em->len);
+               goto out;
+       }
+       ret = 0;
+out:
+       free_extent_map(em);
+       free_extent_map_tree(em_tree);
+       return ret;
+}
+
+/*
+ * Regression test for btrfs_drop_extent_map_range.  Calling with skip_pinned ==
+ * true would mess up the start/end calculations and subsequent splits would be
+ * incorrect.
+ */
+static int test_case_7(void)
+{
+       struct extent_map_tree *em_tree;
+       struct extent_map *em;
+       struct inode *inode;
+       int ret;
+
+       test_msg("Running btrfs_drop_extent_cache with pinned");
+
+       inode = btrfs_new_test_inode();
+       if (!inode) {
+               test_std_err(TEST_ALLOC_INODE);
+               return -ENOMEM;
+       }
+
+       em_tree = &BTRFS_I(inode)->extent_tree;
+
+       em = alloc_extent_map();
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       /* [0, 16K), pinned */
+       em->start = 0;
+       em->len = SZ_16K;
+       em->block_start = 0;
+       em->block_len = SZ_4K;
+       set_bit(EXTENT_FLAG_PINNED, &em->flags);
+       write_lock(&em_tree->lock);
+       ret = add_extent_mapping(em_tree, em, 0);
+       write_unlock(&em_tree->lock);
+       if (ret < 0) {
+               test_err("couldn't add extent map");
+               goto out;
+       }
+       free_extent_map(em);
+
+       em = alloc_extent_map();
+       if (!em) {
+               test_std_err(TEST_ALLOC_EXTENT_MAP);
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       /* [32K, 48K), not pinned */
+       em->start = SZ_32K;
+       em->len = SZ_16K;
+       em->block_start = SZ_32K;
+       em->block_len = SZ_16K;
+       write_lock(&em_tree->lock);
+       ret = add_extent_mapping(em_tree, em, 0);
+       write_unlock(&em_tree->lock);
+       if (ret < 0) {
+               test_err("couldn't add extent map");
+               goto out;
+       }
+       free_extent_map(em);
+
+       /*
+        * Drop [0, 36K) This should skip the [0, 4K) extent and then split the
+        * [32K, 48K) extent.
+        */
+       btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (36 * SZ_1K) - 1, true);
+
+       /* Make sure our extent maps look sane. */
+       ret = -EINVAL;
+
+       em = lookup_extent_mapping(em_tree, 0, SZ_16K);
+       if (!em) {
+               test_err("didn't find an em at 0 as expected");
+               goto out;
+       }
+
+       if (em->start != 0) {
+               test_err("em->start is %llu, expected 0", em->start);
+               goto out;
+       }
+
+       if (em->len != SZ_16K) {
+               test_err("em->len is %llu, expected 16K", em->len);
+               goto out;
+       }
+
+       free_extent_map(em);
+
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, SZ_16K, SZ_16K);
+       read_unlock(&em_tree->lock);
+       if (em) {
+               test_err("found an em when we weren't expecting one");
+               goto out;
+       }
+
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, SZ_32K, SZ_16K);
+       read_unlock(&em_tree->lock);
+       if (!em) {
+               test_err("didn't find an em at 32K as expected");
+               goto out;
+       }
+
+       if (em->start != (36 * SZ_1K)) {
+               test_err("em->start is %llu, expected 36K", em->start);
+               goto out;
+       }
+
+       if (em->len != (12 * SZ_1K)) {
+               test_err("em->len is %llu, expected 12K", em->len);
+               goto out;
+       }
+
+       free_extent_map(em);
+
+       read_lock(&em_tree->lock);
+       em = lookup_extent_mapping(em_tree, 48 * SZ_1K, (u64)-1);
+       read_unlock(&em_tree->lock);
+       if (em) {
+               test_err("found an unexpected em above 48K");
+               goto out;
+       }
+
+       ret = 0;
+out:
+       free_extent_map(em);
+       iput(inode);
+       return ret;
+}
+
 struct rmap_test_vector {
        u64 raid_type;
        u64 physical_start;
@@ -619,6 +1020,17 @@ int btrfs_test_extent_map(void)
        if (ret)
                goto out;
        ret = test_case_4(fs_info, em_tree);
+       if (ret)
+               goto out;
+       ret = test_case_5();
+       if (ret)
+               goto out;
+       ret = test_case_6(fs_info, em_tree);
+       if (ret)
+               goto out;
+       ret = test_case_7();
+       if (ret)
+               goto out;
 
        test_msg("running rmap tests");
        for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
index cf30635..874e439 100644 (file)
@@ -292,10 +292,11 @@ loop:
        spin_unlock(&fs_info->trans_lock);
 
        /*
-        * If we are ATTACH, we just want to catch the current transaction,
-        * and commit it. If there is no transaction, just return ENOENT.
+        * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
+        * current transaction, and commit it. If there is no transaction, just
+        * return ENOENT.
         */
-       if (type == TRANS_ATTACH)
+       if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
                return -ENOENT;
 
        /*
@@ -591,8 +592,13 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
                u64 delayed_refs_bytes = 0;
 
                qgroup_reserved = num_items * fs_info->nodesize;
-               ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
-                               enforce_qgroups);
+               /*
+                * Use prealloc for now, as there might be a currently running
+                * transaction that could free this reserved space prematurely
+                * by committing.
+                */
+               ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserved,
+                                                        enforce_qgroups, false);
                if (ret)
                        return ERR_PTR(ret);
 
@@ -705,6 +711,14 @@ again:
                h->reloc_reserved = reloc_reserved;
        }
 
+       /*
+        * Now that we have found a transaction to be a part of, convert the
+        * qgroup reservation from prealloc to pertrans. A different transaction
+        * can't race in and free our pertrans out from under us.
+        */
+       if (qgroup_reserved)
+               btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+
 got_it:
        if (!current->journal_info)
                current->journal_info = h;
@@ -752,7 +766,7 @@ alloc_fail:
                btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
                                        num_bytes, NULL);
 reserve_fail:
-       btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
+       btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
        return ERR_PTR(ret);
 }
 
@@ -785,7 +799,10 @@ struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *
 
 /*
  * Similar to regular join but it never starts a transaction when none is
- * running or after waiting for the current one to finish.
+ * running or when there's a running one at a state >= TRANS_STATE_UNBLOCKED.
+ * This is similar to btrfs_attach_transaction() but it allows the join to
+ * happen if the transaction commit already started but it's not yet in the
+ * "doing" phase (the state is < TRANS_STATE_COMMIT_DOING).
  */
 struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
 {
@@ -826,8 +843,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
 
        trans = start_transaction(root, 0, TRANS_ATTACH,
                                  BTRFS_RESERVE_NO_FLUSH, true);
-       if (trans == ERR_PTR(-ENOENT))
-               btrfs_wait_for_commit(root->fs_info, 0);
+       if (trans == ERR_PTR(-ENOENT)) {
+               int ret;
+
+               ret = btrfs_wait_for_commit(root->fs_info, 0);
+               if (ret)
+                       return ERR_PTR(ret);
+       }
 
        return trans;
 }
@@ -931,6 +953,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
        }
 
        wait_for_commit(cur_trans, TRANS_STATE_COMPLETED);
+       ret = cur_trans->aborted;
        btrfs_put_transaction(cur_trans);
 out:
        return ret;
@@ -1054,8 +1077,8 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
        u64 start = 0;
        u64 end;
 
-       while (!find_first_extent_bit(dirty_pages, start, &start, &end,
-                                     mark, &cached_state)) {
+       while (find_first_extent_bit(dirty_pages, start, &start, &end,
+                                    mark, &cached_state)) {
                bool wait_writeback = false;
 
                err = convert_extent_bit(dirty_pages, start, end,
@@ -1108,8 +1131,8 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
        u64 start = 0;
        u64 end;
 
-       while (!find_first_extent_bit(dirty_pages, start, &start, &end,
-                                     EXTENT_NEED_WAIT, &cached_state)) {
+       while (find_first_extent_bit(dirty_pages, start, &start, &end,
+                                    EXTENT_NEED_WAIT, &cached_state)) {
                /*
                 * Ignore -ENOMEM errors returned by clear_extent_bit().
                 * When committing the transaction, we'll remove any entries
@@ -1831,8 +1854,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
        btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
                                                  fname.disk_name.len * 2);
-       parent_inode->i_mtime = current_time(parent_inode);
-       parent_inode->i_ctime = parent_inode->i_mtime;
+       parent_inode->i_mtime = inode_set_ctime_current(parent_inode);
        ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
        if (ret) {
                btrfs_abort_transaction(trans, ret);
index 038dfa8..ab08a0b 100644 (file)
@@ -446,6 +446,20 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
        btrfs_item_key_to_cpu(leaf, &item_key, slot);
        is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
 
+       /*
+        * Bad rootid for reloc trees.
+        *
+        * Reloc trees are only for subvolume trees, other trees only need
+        * to be COWed to be relocated.
+        */
+       if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
+                    !is_fstree(key->offset))) {
+               generic_err(leaf, slot,
+               "invalid reloc tree for root %lld, root id is not a subvolume tree",
+                           key->offset);
+               return -EUCLEAN;
+       }
+
        /* No such tree id */
        if (unlikely(key->objectid == 0)) {
                if (is_root_item)
index 365a1cc..d1e46b8 100644 (file)
@@ -4148,9 +4148,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
                                      inode->i_mtime.tv_nsec);
 
        btrfs_set_token_timespec_sec(&token, &item->ctime,
-                                    inode->i_ctime.tv_sec);
+                                    inode_get_ctime(inode).tv_sec);
        btrfs_set_token_timespec_nsec(&token, &item->ctime,
-                                     inode->i_ctime.tv_nsec);
+                                     inode_get_ctime(inode).tv_nsec);
 
        /*
         * We do not need to set the nbytes field, in fact during a fast fsync
@@ -4841,13 +4841,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        struct btrfs_ordered_extent *ordered;
        struct btrfs_ordered_extent *tmp;
        struct extent_map *em, *n;
-       struct list_head extents;
+       LIST_HEAD(extents);
        struct extent_map_tree *tree = &inode->extent_tree;
        int ret = 0;
        int num = 0;
 
-       INIT_LIST_HEAD(&extents);
-
        write_lock(&tree->lock);
 
        list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
@@ -6794,8 +6792,8 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
 
        while (true) {
                struct btrfs_fs_info *fs_info = root->fs_info;
-               struct extent_buffer *leaf = path->nodes[0];
-               int slot = path->slots[0];
+               struct extent_buffer *leaf;
+               int slot;
                struct btrfs_key search_key;
                struct inode *inode;
                u64 ino;
index 73f9ea7..9621455 100644 (file)
@@ -681,6 +681,14 @@ error_free_page:
        return -EINVAL;
 }
 
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
+{
+       bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
+                                 BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+
+       return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
+}
+
 /*
  * Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
  * being created with a disk that has already completed its fsid change. Such
@@ -833,15 +841,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
                    found_transid > fs_devices->latest_generation) {
                        memcpy(fs_devices->fsid, disk_super->fsid,
                                        BTRFS_FSID_SIZE);
-
-                       if (has_metadata_uuid)
-                               memcpy(fs_devices->metadata_uuid,
-                                      disk_super->metadata_uuid,
-                                      BTRFS_FSID_SIZE);
-                       else
-                               memcpy(fs_devices->metadata_uuid,
-                                      disk_super->fsid, BTRFS_FSID_SIZE);
-
+                       memcpy(fs_devices->metadata_uuid,
+                              btrfs_sb_fsid_ptr(disk_super), BTRFS_FSID_SIZE);
                        fs_devices->fsid_change = false;
                }
        }
@@ -851,8 +852,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 
                if (fs_devices->opened) {
                        btrfs_err(NULL,
-               "device %s belongs to fsid %pU, and the fs is already mounted",
-                                 path, fs_devices->fsid);
+"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
+                                 path, fs_devices->fsid, current->comm,
+                                 task_pid_nr(current));
                        mutex_unlock(&fs_devices->device_list_mutex);
                        return ERR_PTR(-EBUSY);
                }
@@ -1424,9 +1426,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
 
        lockdep_assert_held(&device->fs_info->chunk_mutex);
 
-       if (!find_first_extent_bit(&device->alloc_state, *start,
-                                  &physical_start, &physical_end,
-                                  CHUNK_ALLOCATED, NULL)) {
+       if (find_first_extent_bit(&device->alloc_state, *start,
+                                 &physical_start, &physical_end,
+                                 CHUNK_ALLOCATED, NULL)) {
 
                if (in_range(physical_start, *start, len) ||
                    in_range(*start, physical_start,
@@ -1438,18 +1440,18 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
        return false;
 }
 
-static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
+static u64 dev_extent_search_start(struct btrfs_device *device)
 {
        switch (device->fs_devices->chunk_alloc_policy) {
        case BTRFS_CHUNK_ALLOC_REGULAR:
-               return max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
+               return BTRFS_DEVICE_RANGE_RESERVED;
        case BTRFS_CHUNK_ALLOC_ZONED:
                /*
                 * We don't care about the starting region like regular
                 * allocator, because we anyway use/reserve the first two zones
                 * for superblock logging.
                 */
-               return ALIGN(start, device->zone_info->zone_size);
+               return 0;
        default:
                BUG();
        }
@@ -1581,15 +1583,15 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
  * correct usable device space, as device extent freed in current transaction
  * is not reported as available.
  */
-static int find_free_dev_extent_start(struct btrfs_device *device,
-                               u64 num_bytes, u64 search_start, u64 *start,
-                               u64 *len)
+static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
+                               u64 *start, u64 *len)
 {
        struct btrfs_fs_info *fs_info = device->fs_info;
        struct btrfs_root *root = fs_info->dev_root;
        struct btrfs_key key;
        struct btrfs_dev_extent *dev_extent;
        struct btrfs_path *path;
+       u64 search_start;
        u64 hole_size;
        u64 max_hole_start;
        u64 max_hole_size;
@@ -1599,7 +1601,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
        int slot;
        struct extent_buffer *l;
 
-       search_start = dev_extent_search_start(device, search_start);
+       search_start = dev_extent_search_start(device);
 
        WARN_ON(device->zone_info &&
                !IS_ALIGNED(num_bytes, device->zone_info->zone_size));
@@ -1725,13 +1727,6 @@ out:
        return ret;
 }
 
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
-                        u64 *start, u64 *len)
-{
-       /* FIXME use last free of some kind */
-       return find_free_dev_extent_start(device, num_bytes, 0, start, len);
-}
-
 static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
                          struct btrfs_device *device,
                          u64 start, u64 *dev_extent_len)
@@ -1917,15 +1912,13 @@ out:
 static void update_dev_time(const char *device_path)
 {
        struct path path;
-       struct timespec64 now;
        int ret;
 
        ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
        if (ret)
                return;
 
-       now = current_time(d_inode(path.dentry));
-       inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME | S_VERSION);
+       inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION);
        path_put(&path);
 }
 
@@ -4078,14 +4071,6 @@ static int alloc_profile_is_valid(u64 flags, int extended)
        return has_single_bit_set(flags);
 }
 
-static inline int balance_need_close(struct btrfs_fs_info *fs_info)
-{
-       /* cancel requested || normal exit path */
-       return atomic_read(&fs_info->balance_cancel_req) ||
-               (atomic_read(&fs_info->balance_pause_req) == 0 &&
-                atomic_read(&fs_info->balance_cancel_req) == 0);
-}
-
 /*
  * Validate target profile against allowed profiles and return true if it's OK.
  * Otherwise print the error message and return false.
@@ -4275,6 +4260,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
        u64 num_devices;
        unsigned seq;
        bool reducing_redundancy;
+       bool paused = false;
        int i;
 
        if (btrfs_fs_closing(fs_info) ||
@@ -4405,6 +4391,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
        if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
                btrfs_info(fs_info, "balance: paused");
                btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED);
+               paused = true;
        }
        /*
         * Balance can be canceled by:
@@ -4433,8 +4420,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
                btrfs_update_ioctl_balance_args(fs_info, bargs);
        }
 
-       if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
-           balance_need_close(fs_info)) {
+       /* We didn't pause, we can clean everything up. */
+       if (!paused) {
                reset_balance_state(fs_info);
                btrfs_exclop_finish(fs_info);
        }
@@ -4644,8 +4631,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
                }
        }
 
-       BUG_ON(fs_info->balance_ctl ||
-               test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+       ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
        atomic_dec(&fs_info->balance_cancel_req);
        mutex_unlock(&fs_info->balance_mutex);
        return 0;
@@ -6226,6 +6212,45 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
                        stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
 }
 
+/*
+ * Map one logical range to one or more physical ranges.
+ *
+ * @length:            (Mandatory) mapped length of this run.
+ *                     One logical range can be split into different segments
+ *                     due to factors like zones and RAID0/5/6/10 stripe
+ *                     boundaries.
+ *
+ * @bioc_ret:          (Mandatory) returned btrfs_io_context structure.
+ *                     which has one or more physical ranges (btrfs_io_stripe)
+ *                     recorded inside.
+ *                     Caller should call btrfs_put_bioc() to free it after use.
+ *
+ * @smap:              (Optional) single physical range optimization.
+ *                     If the map request can be fulfilled by one single
+ *                     physical range, and this is parameter is not NULL,
+ *                     then @bioc_ret would be NULL, and @smap would be
+ *                     updated.
+ *
+ * @mirror_num_ret:    (Mandatory) returned mirror number if the original
+ *                     value is 0.
+ *
+ *                     Mirror number 0 means to choose any live mirrors.
+ *
+ *                     For non-RAID56 profiles, non-zero mirror_num means
+ *                     the Nth mirror. (e.g. mirror_num 1 means the first
+ *                     copy).
+ *
+ *                     For RAID56 profile, mirror 1 means rebuild from P and
+ *                     the remaining data stripes.
+ *
+ *                     For RAID6 profile, mirror > 2 means mark another
+ *                     data/P stripe error and rebuild from the remaining
+ *                     stripes..
+ *
+ * @need_raid_map:     (Used only for integrity checker) whether the map wants
+ *                      a full stripe map (including all data and P/Q stripes)
+ *                      for RAID56. Should always be 1 except integrity checker.
+ */
 int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
                    u64 logical, u64 *length,
                    struct btrfs_io_context **bioc_ret,
@@ -6400,11 +6425,10 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
         * I/O context structure.
         */
        if (smap && num_alloc_stripes == 1 &&
-           !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
-           (op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
-            !dev_replace->tgtdev)) {
+           !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
                set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
-               *mirror_num_ret = mirror_num;
+               if (mirror_num_ret)
+                       *mirror_num_ret = mirror_num;
                *bioc_ret = NULL;
                ret = 0;
                goto out;
index b8c51f1..2128a03 100644 (file)
@@ -650,8 +650,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
 int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
 int btrfs_uuid_scan_kthread(void *data);
 bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
-                        u64 *start, u64 *max_avail);
 void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
 int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
                        struct btrfs_ioctl_get_dev_stats *stats);
@@ -749,5 +747,6 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
 bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
 
 bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb);
 
 #endif
index fc4b20c..96828a1 100644 (file)
@@ -264,7 +264,7 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
                goto out;
 
        inode_inc_iversion(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
        if (ret)
                btrfs_abort_transaction(trans, ret);
@@ -407,7 +407,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
        ret = btrfs_set_prop(trans, inode, name, value, size, flags);
        if (!ret) {
                inode_inc_iversion(inode);
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
                if (ret)
                        btrfs_abort_transaction(trans, ret);
index 85b8b33..09bc325 100644 (file)
@@ -65,6 +65,9 @@
 
 #define SUPER_INFO_SECTORS     ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
 
+static void wait_eb_writebacks(struct btrfs_block_group *block_group);
+static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written);
+
 static inline bool sb_zone_is_full(const struct blk_zone *zone)
 {
        return (zone->cond == BLK_ZONE_COND_FULL) ||
@@ -465,8 +468,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
         * use the cache.
         */
        if (populate_cache && bdev_is_zoned(device->bdev)) {
-               zone_info->zone_cache = vzalloc(sizeof(struct blk_zone) *
-                                               zone_info->nr_zones);
+               zone_info->zone_cache = vcalloc(zone_info->nr_zones,
+                                               sizeof(struct blk_zone));
                if (!zone_info->zone_cache) {
                        btrfs_err_in_rcu(device->fs_info,
                                "zoned: failed to allocate zone cache for %s",
@@ -805,6 +808,9 @@ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
                return -EINVAL;
        }
 
+       btrfs_clear_and_info(info, DISCARD_ASYNC,
+                       "zoned: async discard ignored and disabled for zoned mode");
+
        return 0;
 }
 
@@ -1580,19 +1586,9 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
                return;
 
        WARN_ON(cache->bytes_super != 0);
-
-       /* Check for block groups never get activated */
-       if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
-           cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
-           !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
-           cache->alloc_offset == 0) {
-               unusable = cache->length;
-               free = 0;
-       } else {
-               unusable = (cache->alloc_offset - cache->used) +
-                          (cache->length - cache->zone_capacity);
-               free = cache->zone_capacity - cache->alloc_offset;
-       }
+       unusable = (cache->alloc_offset - cache->used) +
+                  (cache->length - cache->zone_capacity);
+       free = cache->zone_capacity - cache->alloc_offset;
 
        /* We only need ->free_space in ALLOC_SEQ block groups */
        cache->cached = BTRFS_CACHE_FINISHED;
@@ -1704,10 +1700,21 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
 {
        struct btrfs_inode *inode = BTRFS_I(ordered->inode);
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
-       struct btrfs_ordered_sum *sum =
-               list_first_entry(&ordered->list, typeof(*sum), list);
-       u64 logical = sum->logical;
-       u64 len = sum->len;
+       struct btrfs_ordered_sum *sum;
+       u64 logical, len;
+
+       /*
+        * Write to pre-allocated region is for the data relocation, and so
+        * it should use WRITE operation. No split/rewrite are necessary.
+        */
+       if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
+               return;
+
+       ASSERT(!list_empty(&ordered->list));
+       /* The ordered->list can be empty in the above pre-alloc case. */
+       sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list);
+       logical = sum->logical;
+       len = sum->len;
 
        while (len < ordered->disk_num_bytes) {
                sum = list_next_entry(sum, list);
@@ -1744,41 +1751,121 @@ out:
        }
 }
 
-bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-                                   struct extent_buffer *eb,
-                                   struct btrfs_block_group **cache_ret)
+static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
+                              struct btrfs_block_group **active_bg)
 {
-       struct btrfs_block_group *cache;
-       bool ret = true;
+       const struct writeback_control *wbc = ctx->wbc;
+       struct btrfs_block_group *block_group = ctx->zoned_bg;
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
 
-       if (!btrfs_is_zoned(fs_info))
+       if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
                return true;
 
-       cache = btrfs_lookup_block_group(fs_info, eb->start);
-       if (!cache)
-               return true;
+       if (fs_info->treelog_bg == block_group->start) {
+               if (!btrfs_zone_activate(block_group)) {
+                       int ret_fin = btrfs_zone_finish_one_bg(fs_info);
 
-       if (cache->meta_write_pointer != eb->start) {
-               btrfs_put_block_group(cache);
-               cache = NULL;
-               ret = false;
-       } else {
-               cache->meta_write_pointer = eb->start + eb->len;
-       }
+                       if (ret_fin != 1 || !btrfs_zone_activate(block_group))
+                               return false;
+               }
+       } else if (*active_bg != block_group) {
+               struct btrfs_block_group *tgt = *active_bg;
 
-       *cache_ret = cache;
+               /* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */
+               lockdep_assert_held(&fs_info->zoned_meta_io_lock);
 
-       return ret;
+               if (tgt) {
+                       /*
+                        * If there is an unsent IO left in the allocated area,
+                        * we cannot wait for them as it may cause a deadlock.
+                        */
+                       if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) {
+                               if (wbc->sync_mode == WB_SYNC_NONE ||
+                                   (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync))
+                                       return false;
+                       }
+
+                       /* Pivot active metadata/system block group. */
+                       btrfs_zoned_meta_io_unlock(fs_info);
+                       wait_eb_writebacks(tgt);
+                       do_zone_finish(tgt, true);
+                       btrfs_zoned_meta_io_lock(fs_info);
+                       if (*active_bg == tgt) {
+                               btrfs_put_block_group(tgt);
+                               *active_bg = NULL;
+                       }
+               }
+               if (!btrfs_zone_activate(block_group))
+                       return false;
+               if (*active_bg != block_group) {
+                       ASSERT(*active_bg == NULL);
+                       *active_bg = block_group;
+                       btrfs_get_block_group(block_group);
+               }
+       }
+
+       return true;
 }
 
-void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
-                                    struct extent_buffer *eb)
+/*
+ * Check if @ctx->eb is aligned to the write pointer.
+ *
+ * Return:
+ *   0:        @ctx->eb is at the write pointer. You can write it.
+ *   -EAGAIN:  There is a hole. The caller should handle the case.
+ *   -EBUSY:   There is a hole, but the caller can just bail out.
+ */
+int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_eb_write_context *ctx)
 {
-       if (!btrfs_is_zoned(eb->fs_info) || !cache)
-               return;
+       const struct writeback_control *wbc = ctx->wbc;
+       const struct extent_buffer *eb = ctx->eb;
+       struct btrfs_block_group *block_group = ctx->zoned_bg;
+
+       if (!btrfs_is_zoned(fs_info))
+               return 0;
+
+       if (block_group) {
+               if (block_group->start > eb->start ||
+                   block_group->start + block_group->length <= eb->start) {
+                       btrfs_put_block_group(block_group);
+                       block_group = NULL;
+                       ctx->zoned_bg = NULL;
+               }
+       }
+
+       if (!block_group) {
+               block_group = btrfs_lookup_block_group(fs_info, eb->start);
+               if (!block_group)
+                       return 0;
+               ctx->zoned_bg = block_group;
+       }
+
+       if (block_group->meta_write_pointer == eb->start) {
+               struct btrfs_block_group **tgt;
+
+               if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
+                       return 0;
+
+               if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)
+                       tgt = &fs_info->active_system_bg;
+               else
+                       tgt = &fs_info->active_meta_bg;
+               if (check_bg_is_active(ctx, tgt))
+                       return 0;
+       }
+
+       /*
+        * Since we may release fs_info->zoned_meta_io_lock, someone can already
+        * start writing this eb. In that case, we can just bail out.
+        */
+       if (block_group->meta_write_pointer > eb->start)
+               return -EBUSY;
 
-       ASSERT(cache->meta_write_pointer == eb->start + eb->len);
-       cache->meta_write_pointer = eb->start;
+       /* If for_sync, this hole will be filled with trasnsaction commit. */
+       if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
+               return -EAGAIN;
+       return -EBUSY;
 }
 
 int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length)
@@ -1876,10 +1963,10 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
 bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 {
        struct btrfs_fs_info *fs_info = block_group->fs_info;
-       struct btrfs_space_info *space_info = block_group->space_info;
        struct map_lookup *map;
        struct btrfs_device *device;
        u64 physical;
+       const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA);
        bool ret;
        int i;
 
@@ -1888,7 +1975,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
        map = block_group->physical_map;
 
-       spin_lock(&space_info->lock);
        spin_lock(&block_group->lock);
        if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
                ret = true;
@@ -1901,30 +1987,44 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
                goto out_unlock;
        }
 
+       spin_lock(&fs_info->zone_active_bgs_lock);
        for (i = 0; i < map->num_stripes; i++) {
+               struct btrfs_zoned_device_info *zinfo;
+               int reserved = 0;
+
                device = map->stripes[i].dev;
                physical = map->stripes[i].physical;
+               zinfo = device->zone_info;
 
-               if (device->zone_info->max_active_zones == 0)
+               if (zinfo->max_active_zones == 0)
                        continue;
 
+               if (is_data)
+                       reserved = zinfo->reserved_active_zones;
+               /*
+                * For the data block group, leave active zones for one
+                * metadata block group and one system block group.
+                */
+               if (atomic_read(&zinfo->active_zones_left) <= reserved) {
+                       ret = false;
+                       spin_unlock(&fs_info->zone_active_bgs_lock);
+                       goto out_unlock;
+               }
+
                if (!btrfs_dev_set_active_zone(device, physical)) {
                        /* Cannot activate the zone */
                        ret = false;
+                       spin_unlock(&fs_info->zone_active_bgs_lock);
                        goto out_unlock;
                }
+               if (!is_data)
+                       zinfo->reserved_active_zones--;
        }
+       spin_unlock(&fs_info->zone_active_bgs_lock);
 
        /* Successfully activated all the zones */
        set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
-       WARN_ON(block_group->alloc_offset != 0);
-       if (block_group->zone_unusable == block_group->length) {
-               block_group->zone_unusable = block_group->length - block_group->zone_capacity;
-               space_info->bytes_zone_unusable -= block_group->zone_capacity;
-       }
        spin_unlock(&block_group->lock);
-       btrfs_try_granting_tickets(fs_info, space_info);
-       spin_unlock(&space_info->lock);
 
        /* For the active block group list */
        btrfs_get_block_group(block_group);
@@ -1937,7 +2037,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 
 out_unlock:
        spin_unlock(&block_group->lock);
-       spin_unlock(&space_info->lock);
        return ret;
 }
 
@@ -2003,6 +2102,10 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
         * and block_group->meta_write_pointer for metadata.
         */
        if (!fully_written) {
+               if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+                       spin_unlock(&block_group->lock);
+                       return -EAGAIN;
+               }
                spin_unlock(&block_group->lock);
 
                ret = btrfs_inc_block_group_ro(block_group, false);
@@ -2031,7 +2134,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
                        return 0;
                }
 
-               if (block_group->reserved) {
+               if (block_group->reserved ||
+                   test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
+                            &block_group->runtime_flags)) {
                        spin_unlock(&block_group->lock);
                        btrfs_dec_block_group_ro(block_group);
                        return -EAGAIN;
@@ -2040,6 +2145,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
 
        clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
        block_group->alloc_offset = block_group->zone_capacity;
+       if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
+               block_group->meta_write_pointer = block_group->start +
+                                                 block_group->zone_capacity;
        block_group->free_space_ctl->free_space = 0;
        btrfs_clear_treelog_bg(block_group);
        btrfs_clear_data_reloc_bg(block_group);
@@ -2049,18 +2157,21 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
                const u64 physical = map->stripes[i].physical;
+               struct btrfs_zoned_device_info *zinfo = device->zone_info;
 
-               if (device->zone_info->max_active_zones == 0)
+               if (zinfo->max_active_zones == 0)
                        continue;
 
                ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
                                       physical >> SECTOR_SHIFT,
-                                      device->zone_info->zone_size >> SECTOR_SHIFT,
+                                      zinfo->zone_size >> SECTOR_SHIFT,
                                       GFP_NOFS);
 
                if (ret)
                        return ret;
 
+               if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
+                       zinfo->reserved_active_zones++;
                btrfs_dev_clear_active_zone(device, physical);
        }
 
@@ -2099,8 +2210,10 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
 
        /* Check if there is a device with active zones left */
        mutex_lock(&fs_info->chunk_mutex);
+       spin_lock(&fs_info->zone_active_bgs_lock);
        list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
                struct btrfs_zoned_device_info *zinfo = device->zone_info;
+               int reserved = 0;
 
                if (!device->bdev)
                        continue;
@@ -2110,17 +2223,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
                        break;
                }
 
+               if (flags & BTRFS_BLOCK_GROUP_DATA)
+                       reserved = zinfo->reserved_active_zones;
+
                switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
                case 0: /* single */
-                       ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+                       ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved));
                        break;
                case BTRFS_BLOCK_GROUP_DUP:
-                       ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+                       ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved));
                        break;
                }
                if (ret)
                        break;
        }
+       spin_unlock(&fs_info->zone_active_bgs_lock);
        mutex_unlock(&fs_info->chunk_mutex);
 
        if (!ret)
@@ -2262,7 +2379,10 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
 
        /* All relocation extents are written. */
        if (block_group->start + block_group->alloc_offset == logical + length) {
-               /* Now, release this block group for further allocations. */
+               /*
+                * Now, release this block group for further allocations and
+                * zone finish.
+                */
                clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
                          &block_group->runtime_flags);
        }
@@ -2286,7 +2406,8 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
 
                spin_lock(&block_group->lock);
                if (block_group->reserved || block_group->alloc_offset == 0 ||
-                   (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
+                   (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
+                   test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
                        spin_unlock(&block_group->lock);
                        continue;
                }
@@ -2362,3 +2483,55 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
 
        return 0;
 }
+
+/*
+ * Reserve zones for one metadata block group, one tree-log block group, and one
+ * system block group.
+ */
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+       struct btrfs_block_group *block_group;
+       struct btrfs_device *device;
+       /* Reserve zones for normal SINGLE metadata and tree-log block group. */
+       unsigned int metadata_reserve = 2;
+       /* Reserve a zone for SINGLE system block group. */
+       unsigned int system_reserve = 1;
+
+       if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
+               return;
+
+       /*
+        * This function is called from the mount context. So, there is no
+        * parallel process touching the bits. No need for read_seqretry().
+        */
+       if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+               metadata_reserve = 4;
+       if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+               system_reserve = 2;
+
+       /* Apply the reservation on all the devices. */
+       mutex_lock(&fs_devices->device_list_mutex);
+       list_for_each_entry(device, &fs_devices->devices, dev_list) {
+               if (!device->bdev)
+                       continue;
+
+               device->zone_info->reserved_active_zones =
+                       metadata_reserve + system_reserve;
+       }
+       mutex_unlock(&fs_devices->device_list_mutex);
+
+       /* Release reservation for currently active block groups. */
+       spin_lock(&fs_info->zone_active_bgs_lock);
+       list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+               struct map_lookup *map = block_group->physical_map;
+
+               if (!(block_group->flags &
+                     (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
+                       continue;
+
+               for (int i = 0; i < map->num_stripes; i++)
+                       map->stripes[i].dev->zone_info->reserved_active_zones--;
+       }
+       spin_unlock(&fs_info->zone_active_bgs_lock);
+}
index 27322b9..b9cec52 100644 (file)
@@ -22,6 +22,11 @@ struct btrfs_zoned_device_info {
        u8  zone_size_shift;
        u32 nr_zones;
        unsigned int max_active_zones;
+       /*
+        * Reserved active zones for one metadata and one system block group.
+        * It can vary per-device depending on the allocation status.
+        */
+       int reserved_active_zones;
        atomic_t active_zones_left;
        unsigned long *seq_zones;
        unsigned long *empty_zones;
@@ -58,11 +63,8 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
                            struct extent_buffer *eb);
 bool btrfs_use_zone_append(struct btrfs_bio *bbio);
 void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
-bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-                                   struct extent_buffer *eb,
-                                   struct btrfs_block_group **cache_ret);
-void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
-                                    struct extent_buffer *eb);
+int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+                                  struct btrfs_eb_write_context *ctx);
 int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length);
 int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
                                  u64 physical_start, u64 physical_pos);
@@ -81,6 +83,7 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
 int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
 int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
                                struct btrfs_space_info *space_info, bool do_finish);
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
 #else /* CONFIG_BLK_DEV_ZONED */
 static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
                                     struct blk_zone *zone)
@@ -189,17 +192,10 @@ static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
 {
 }
 
-static inline bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
-                              struct extent_buffer *eb,
-                              struct btrfs_block_group **cache_ret)
-{
-       return true;
-}
-
-static inline void btrfs_revert_meta_write_pointer(
-                                               struct btrfs_block_group *cache,
-                                               struct extent_buffer *eb)
+static inline int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+                                                struct btrfs_eb_write_context *ctx)
 {
+       return 0;
 }
 
 static inline int btrfs_zoned_issue_zeroout(struct btrfs_device *device,
@@ -262,6 +258,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
        return 0;
 }
 
+static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
+
 #endif
 
 static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
index bd09132..084a6ad 100644 (file)
@@ -49,6 +49,7 @@
 #include <trace/events/block.h>
 #include <linux/fscrypt.h>
 #include <linux/fsverity.h>
+#include <linux/sched/isolation.h>
 
 #include "internal.h"
 
@@ -1225,19 +1226,14 @@ EXPORT_SYMBOL(mark_buffer_dirty);
 
 void mark_buffer_write_io_error(struct buffer_head *bh)
 {
-       struct super_block *sb;
-
        set_buffer_write_io_error(bh);
        /* FIXME: do we need to set this in both places? */
        if (bh->b_folio && bh->b_folio->mapping)
                mapping_set_error(bh->b_folio->mapping, -EIO);
-       if (bh->b_assoc_map)
+       if (bh->b_assoc_map) {
                mapping_set_error(bh->b_assoc_map, -EIO);
-       rcu_read_lock();
-       sb = READ_ONCE(bh->b_bdev->bd_super);
-       if (sb)
-               errseq_set(&sb->s_wb_err, -EIO);
-       rcu_read_unlock();
+               errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO);
+       }
 }
 EXPORT_SYMBOL(mark_buffer_write_io_error);
 
@@ -1352,7 +1348,7 @@ static void bh_lru_install(struct buffer_head *bh)
         * failing page migration.
         * Skip putting upcoming bh into bh_lru until migration is done.
         */
-       if (lru_cache_disabled()) {
+       if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) {
                bh_lru_unlock();
                return;
        }
@@ -1382,6 +1378,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
 
        check_irqs_on();
        bh_lru_lock();
+       if (cpu_is_isolated(smp_processor_id())) {
+               bh_lru_unlock();
+               return NULL;
+       }
        for (i = 0; i < BH_LRU_SIZE; i++) {
                struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
 
index 175a25f..009d23c 100644 (file)
@@ -259,9 +259,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
 
        _enter("%ld", ret);
 
-       /* Tell lockdep we inherited freeze protection from submission thread */
-       __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
-       __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
+       kiocb_end_write(iocb);
 
        if (ret < 0)
                trace_cachefiles_io_error(object, inode, ret,
@@ -286,7 +284,6 @@ int __cachefiles_write(struct cachefiles_object *object,
 {
        struct cachefiles_cache *cache;
        struct cachefiles_kiocb *ki;
-       struct inode *inode;
        unsigned int old_nofs;
        ssize_t ret;
        size_t len = iov_iter_count(iter);
@@ -322,19 +319,12 @@ int __cachefiles_write(struct cachefiles_object *object,
                ki->iocb.ki_complete = cachefiles_write_complete;
        atomic_long_add(ki->b_writing, &cache->b_writing);
 
-       /* Open-code file_start_write here to grab freeze protection, which
-        * will be released by another thread in aio_complete_rw().  Fool
-        * lockdep by telling it the lock got released so that it doesn't
-        * complain about the held lock when we return to userspace.
-        */
-       inode = file_inode(file);
-       __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
-       __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+       kiocb_start_write(&ki->iocb);
 
        get_file(ki->iocb.ki_filp);
        cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
 
-       trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
+       trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len);
        old_nofs = memalloc_nofs_save();
        ret = cachefiles_inject_write_error();
        if (ret == 0)
index 6945a93..c91b293 100644 (file)
@@ -93,7 +93,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
        char *value = NULL;
        struct iattr newattrs;
        struct inode *inode = d_inode(dentry);
-       struct timespec64 old_ctime = inode->i_ctime;
+       struct timespec64 old_ctime = inode_get_ctime(inode);
        umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
 
        if (ceph_snap(inode) != CEPH_NOSNAP) {
index e2bb0d0..09cd6d3 100644 (file)
@@ -1400,7 +1400,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
 
        arg->mtime = inode->i_mtime;
        arg->atime = inode->i_atime;
-       arg->ctime = inode->i_ctime;
+       arg->ctime = inode_get_ctime(inode);
        arg->btime = ci->i_btime;
        arg->change_attr = inode_peek_iversion_raw(inode);
 
index 4a2b39d..bdcffb0 100644 (file)
@@ -2019,9 +2019,10 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
        }
 }
 
+WRAP_DIR_ITER(ceph_readdir) // FIXME!
 const struct file_operations ceph_dir_fops = {
        .read = ceph_read_dir,
-       .iterate = ceph_readdir,
+       .iterate_shared = shared_ceph_readdir,
        .llseek = ceph_dir_llseek,
        .open = ceph_open,
        .release = ceph_release,
@@ -2033,7 +2034,7 @@ const struct file_operations ceph_dir_fops = {
 };
 
 const struct file_operations ceph_snapdir_fops = {
-       .iterate = ceph_readdir,
+       .iterate_shared = shared_ceph_readdir,
        .llseek = ceph_dir_llseek,
        .open = ceph_open,
        .release = ceph_release,
index 8e5f41d..fd05d68 100644 (file)
@@ -100,7 +100,7 @@ struct inode *ceph_get_snapdir(struct inode *parent)
        inode->i_uid = parent->i_uid;
        inode->i_gid = parent->i_gid;
        inode->i_mtime = parent->i_mtime;
-       inode->i_ctime = parent->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(parent));
        inode->i_atime = parent->i_atime;
        ci->i_rbytes = 0;
        ci->i_btime = ceph_inode(parent)->i_btime;
@@ -688,6 +688,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
                         struct timespec64 *mtime, struct timespec64 *atime)
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
+       struct timespec64 ictime = inode_get_ctime(inode);
        int warn = 0;
 
        if (issued & (CEPH_CAP_FILE_EXCL|
@@ -696,11 +697,11 @@ void ceph_fill_file_time(struct inode *inode, int issued,
                      CEPH_CAP_AUTH_EXCL|
                      CEPH_CAP_XATTR_EXCL)) {
                if (ci->i_version == 0 ||
-                   timespec64_compare(ctime, &inode->i_ctime) > 0) {
+                   timespec64_compare(ctime, &ictime) > 0) {
                        dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
-                            inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+                            ictime.tv_sec, ictime.tv_nsec,
                             ctime->tv_sec, ctime->tv_nsec);
-                       inode->i_ctime = *ctime;
+                       inode_set_ctime_to_ts(inode, *ctime);
                }
                if (ci->i_version == 0 ||
                    ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
@@ -738,7 +739,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
        } else {
                /* we have no write|excl caps; whatever the MDS says is true */
                if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
-                       inode->i_ctime = *ctime;
+                       inode_set_ctime_to_ts(inode, *ctime);
                        inode->i_mtime = *mtime;
                        inode->i_atime = *atime;
                        ci->i_time_warp_seq = time_warp_seq;
@@ -2166,7 +2167,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
                bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
                                         ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
                dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode,
-                    inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+                    inode_get_ctime(inode).tv_sec,
+                    inode_get_ctime(inode).tv_nsec,
                     attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
                     only ? "ctime only" : "ignored");
                if (only) {
@@ -2191,7 +2193,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
        if (dirtied) {
                inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
                                                           &prealloc_cf);
-               inode->i_ctime = attr->ia_ctime;
+               inode_set_ctime_to_ts(inode, attr->ia_ctime);
                inode_inc_iversion_raw(inode);
        }
 
@@ -2465,7 +2467,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
                        return err;
        }
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        stat->ino = ceph_present_inode(inode);
 
        /*
index 66048a8..5fb367b 100644 (file)
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
 
        dout("mdsc delayed_work\n");
 
-       if (mdsc->stopping)
+       if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
                return;
 
        mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
 void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
 {
        dout("pre_umount\n");
-       mdsc->stopping = 1;
+       mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
 
        ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
        ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
index 724307f..86d2965 100644 (file)
@@ -380,6 +380,11 @@ struct cap_wait {
        int                     want;
 };
 
+enum {
+       CEPH_MDSC_STOPPING_BEGIN = 1,
+       CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
 /*
  * mds client state
  */
index cce78d7..6d3584f 100644 (file)
@@ -216,7 +216,7 @@ static void metric_delayed_work(struct work_struct *work)
        struct ceph_mds_client *mdsc =
                container_of(m, struct ceph_mds_client, metric);
 
-       if (mdsc->stopping)
+       if (mdsc->stopping || disable_send_metrics)
                return;
 
        if (!m->session || !check_session_state(m->session)) {
index 343d738..c9920ad 100644 (file)
@@ -660,7 +660,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
        capsnap->size = i_size_read(inode);
        capsnap->mtime = inode->i_mtime;
        capsnap->atime = inode->i_atime;
-       capsnap->ctime = inode->i_ctime;
+       capsnap->ctime = inode_get_ctime(inode);
        capsnap->btime = ci->i_btime;
        capsnap->change_attr = inode_peek_iversion_raw(inode);
        capsnap->time_warp_seq = ci->i_time_warp_seq;
index 3fc48b4..a5f5201 100644 (file)
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
        ceph_mdsc_pre_umount(fsc->mdsc);
        flush_fs_workqueues(fsc);
 
+       /*
+        * Though the kill_anon_super() will finally trigger the
+        * sync_filesystem() anyway, we still need to do it here
+        * and then bump the stage of shutdown to stop the work
+        * queue as earlier as possible.
+        */
+       sync_filesystem(s);
+
+       fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
        kill_anon_super(s);
 
        fsc->client->extra_mon_dispatch = NULL;
index 8061839..1cbd84c 100644 (file)
@@ -1238,7 +1238,7 @@ retry:
                dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
                                               &prealloc_cf);
                ci->i_xattrs.dirty = true;
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
        }
 
        spin_unlock(&ci->i_ceph_lock);
index 903ca8f..ae02385 100644 (file)
@@ -127,7 +127,8 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
        if (attr->va_mtime.tv_sec != -1)
                inode->i_mtime = coda_to_timespec64(attr->va_mtime);
         if (attr->va_ctime.tv_sec != -1)
-               inode->i_ctime = coda_to_timespec64(attr->va_ctime);
+               inode_set_ctime_to_ts(inode,
+                                     coda_to_timespec64(attr->va_ctime));
 }
 
 
index 8450b1b..cb512b1 100644 (file)
@@ -111,7 +111,7 @@ static inline void coda_dir_update_mtime(struct inode *dir)
        /* optimistically we can also act as if our nose bleeds. The
         * granularity of the mtime is coarse anyways so we might actually be
         * right most of the time. Note: we only do this for directories. */
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 #endif
 }
 
@@ -429,21 +429,14 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
        cfi = coda_ftoc(coda_file);
        host_file = cfi->cfi_container;
 
-       if (host_file->f_op->iterate || host_file->f_op->iterate_shared) {
+       if (host_file->f_op->iterate_shared) {
                struct inode *host_inode = file_inode(host_file);
                ret = -ENOENT;
                if (!IS_DEADDIR(host_inode)) {
-                       if (host_file->f_op->iterate_shared) {
-                               inode_lock_shared(host_inode);
-                               ret = host_file->f_op->iterate_shared(host_file, ctx);
-                               file_accessed(host_file);
-                               inode_unlock_shared(host_inode);
-                       } else {
-                               inode_lock(host_inode);
-                               ret = host_file->f_op->iterate(host_file, ctx);
-                               file_accessed(host_file);
-                               inode_unlock(host_inode);
-                       }
+                       inode_lock_shared(host_inode);
+                       ret = host_file->f_op->iterate_shared(host_file, ctx);
+                       file_accessed(host_file);
+                       inode_unlock_shared(host_inode);
                }
                return ret;
        }
@@ -585,10 +578,11 @@ const struct inode_operations coda_dir_inode_operations = {
        .setattr        = coda_setattr,
 };
 
+WRAP_DIR_ITER(coda_readdir) // FIXME!
 const struct file_operations coda_dir_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
-       .iterate        = coda_readdir,
+       .iterate_shared = shared_coda_readdir,
        .open           = coda_open,
        .release        = coda_release,
        .fsync          = coda_fsync,
index 12b26bd..4234661 100644 (file)
@@ -84,7 +84,7 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
        ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
        coda_inode->i_size = file_inode(host_file)->i_size;
        coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
-       coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode);
+       coda_inode->i_mtime = inode_set_ctime_current(coda_inode);
        inode_unlock(coda_inode);
        file_end_write(host_file);
 
index d661e6c..0c7c252 100644 (file)
@@ -256,7 +256,8 @@ int coda_getattr(struct mnt_idmap *idmap, const struct path *path,
 {
        int err = coda_revalidate_inode(d_inode(path->dentry));
        if (!err)
-               generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+               generic_fillattr(&nop_mnt_idmap, request_mask,
+                                d_inode(path->dentry), stat);
        return err;
 }
 
@@ -269,7 +270,7 @@ int coda_setattr(struct mnt_idmap *idmap, struct dentry *de,
 
        memset(&vattr, 0, sizeof(vattr)); 
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        coda_iattr_to_vattr(iattr, &vattr);
        vattr.va_type = C_VNON; /* cannot set type */
 
index 1c15edb..fbdcb35 100644 (file)
@@ -88,8 +88,7 @@ int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
 {
        inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime =
-               inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 }
 
 static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
@@ -99,7 +98,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
        inode->i_gid = iattr->ia_gid;
        inode->i_atime = iattr->ia_atime;
        inode->i_mtime = iattr->ia_mtime;
-       inode->i_ctime = iattr->ia_ctime;
+       inode_set_ctime_to_ts(inode, iattr->ia_ctime);
 }
 
 struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
@@ -172,7 +171,7 @@ struct inode *configfs_create(struct dentry *dentry, umode_t mode)
                return ERR_PTR(-ENOMEM);
 
        p_inode = d_inode(dentry->d_parent);
-       p_inode->i_mtime = p_inode->i_ctime = current_time(p_inode);
+       p_inode->i_mtime = inode_set_ctime_current(p_inode);
        configfs_set_inode_lock_class(sd, inode);
        return inode;
 }
index 27c6597..5ee7d7b 100644 (file)
@@ -133,7 +133,8 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
        }
 
        /* Struct copy intentional */
-       inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+       inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+                                                               zerotime);
        /* inode->i_nlink is left 1 - arguably wrong for directories,
           but it's the best we can do without reading the directory
           contents.  1 yields the right result in GNU find, even
@@ -485,12 +486,16 @@ static void cramfs_kill_sb(struct super_block *sb)
 {
        struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
 
+       generic_shutdown_super(sb);
+
        if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
                if (sbi && sbi->mtd_point_size)
                        mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
-               kill_mtd_super(sb);
+               put_mtd_device(sb->s_mtd);
+               sb->s_mtd = NULL;
        } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
-               kill_block_super(sb);
+               sync_blockdev(sb->s_bdev);
+               blkdev_put(sb->s_bdev, sb);
        }
        kfree(sbi);
 }
index 52e6d5f..25ac74d 100644 (file)
@@ -1664,7 +1664,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
        if (dentry == _data && dentry->d_lockref.count == 1)
                return D_WALK_CONTINUE;
 
-       printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
+       WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} "
                        " still in use (%d) [unmount of %s %s]\n",
                       dentry,
                       dentry->d_inode ?
@@ -1673,7 +1673,6 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
                       dentry->d_lockref.count,
                       dentry->d_sb->s_type->name,
                       dentry->d_sb->s_id);
-       WARN_ON(1);
        return D_WALK_CONTINUE;
 }
 
@@ -3247,8 +3246,6 @@ void d_genocide(struct dentry *parent)
        d_walk(parent, parent, d_genocide_kill);
 }
 
-EXPORT_SYMBOL(d_genocide);
-
 void d_tmpfile(struct file *file, struct inode *inode)
 {
        struct dentry *dentry = file->f_path.dentry;
index 3f81f73..83e57e9 100644 (file)
@@ -72,8 +72,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb)
        struct inode *inode = new_inode(sb);
        if (inode) {
                inode->i_ino = get_next_ino();
-               inode->i_atime = inode->i_mtime =
-                       inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        }
        return inode;
 }
index fe3db0e..299c295 100644 (file)
@@ -338,7 +338,7 @@ static int mknod_ptmx(struct super_block *sb)
        }
 
        inode->i_ino = 2;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 
        mode = S_IFCHR|opts->ptmxmode;
        init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
@@ -451,7 +451,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
        if (!inode)
                goto fail;
        inode->i_ino = 1;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
@@ -534,12 +534,12 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
 
 /**
  * devpts_pty_new -- create a new inode in /dev/pts/
- * @ptmx_inode: inode of the master
- * @device: major+minor of the node to be created
+ * @fsi: Filesystem info for this instance.
  * @index: used as a name of the node
  * @priv: what's given back by devpts_get_priv
  *
- * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
+ * The dentry for the created inode is returned.
+ * Remove it from /dev/pts/ with devpts_pty_kill().
  */
 struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
 {
@@ -560,7 +560,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
        inode->i_ino = index + 3;
        inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
        inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        init_special_inode(inode, S_IFCHR|opts->mode, MKDEV(UNIX98_PTY_SLAVE_MAJOR, index));
 
        sprintf(s, "%d", index);
@@ -580,7 +580,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
 
 /**
  * devpts_get_priv -- get private data for a slave
- * @pts_inode: inode of the slave
+ * @dentry: dentry of the slave
  *
  * Returns whatever was passed as priv in devpts_pty_new for a given inode.
  */
@@ -593,7 +593,7 @@ void *devpts_get_priv(struct dentry *dentry)
 
 /**
  * devpts_pty_kill -- remove inode form /dev/pts/
- * @inode: inode of the slave to be removed
+ * @dentry: dentry of the slave to be removed
  *
  * This is an inverse operation of devpts_pty_new.
  */
index c16f0d6..03bd550 100644 (file)
@@ -441,10 +441,10 @@ int ecryptfs_encrypt_page(struct page *page)
        }
 
        lower_offset = lower_offset_for_page(crypt_stat, page);
-       enc_extent_virt = kmap(enc_extent_page);
+       enc_extent_virt = kmap_local_page(enc_extent_page);
        rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
                                  PAGE_SIZE);
-       kunmap(enc_extent_page);
+       kunmap_local(enc_extent_virt);
        if (rc < 0) {
                ecryptfs_printk(KERN_ERR,
                        "Error attempting to write lower page; rc = [%d]\n",
@@ -490,10 +490,10 @@ int ecryptfs_decrypt_page(struct page *page)
        BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
 
        lower_offset = lower_offset_for_page(crypt_stat, page);
-       page_virt = kmap(page);
+       page_virt = kmap_local_page(page);
        rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE,
                                 ecryptfs_inode);
-       kunmap(page);
+       kunmap_local(page_virt);
        if (rc < 0) {
                ecryptfs_printk(KERN_ERR,
                        "Error attempting to read lower page; rc = [%d]\n",
index 8327491..992d9c7 100644 (file)
@@ -148,7 +148,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
        }
        fsstack_copy_attr_times(dir, lower_dir);
        set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
-       inode->i_ctime = dir->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
 out_unlock:
        dput(lower_dentry);
        inode_unlock(lower_dir);
@@ -982,7 +982,7 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap,
 
        mount_crypt_stat = &ecryptfs_superblock_to_private(
                                                dentry->d_sb)->mount_crypt_stat;
-       generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
        if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
                char *target;
                size_t targetsiz;
@@ -1011,7 +1011,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap,
        if (!rc) {
                fsstack_copy_attr_all(d_inode(dentry),
                                      ecryptfs_inode_to_lower(d_inode(dentry)));
-               generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+               generic_fillattr(&nop_mnt_idmap, request_mask,
+                                d_inode(dentry), stat);
                stat->blocks = lower_stat.blocks;
        }
        return rc;
index 373c3e5..e2483ac 100644 (file)
@@ -125,7 +125,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
                        /* This is a header extent */
                        char *page_virt;
 
-                       page_virt = kmap_atomic(page);
+                       page_virt = kmap_local_page(page);
                        memset(page_virt, 0, PAGE_SIZE);
                        /* TODO: Support more than one header extent */
                        if (view_extent_num == 0) {
@@ -138,7 +138,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
                                                               crypt_stat,
                                                               &written);
                        }
-                       kunmap_atomic(page_virt);
+                       kunmap_local(page_virt);
                        flush_dcache_page(page);
                        if (rc) {
                                printk(KERN_ERR "%s: Error reading xattr "
@@ -255,7 +255,6 @@ out:
  * @mapping: The eCryptfs object
  * @pos: The file offset at which to start writing
  * @len: Length of the write
- * @flags: Various flags
  * @pagep: Pointer to return the page
  * @fsdata: Pointer to return fs data (unused)
  *
index 60bdcad..3458f15 100644 (file)
@@ -64,11 +64,11 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
 
        offset = ((((loff_t)page_for_lower->index) << PAGE_SHIFT)
                  + offset_in_page);
-       virt = kmap(page_for_lower);
+       virt = kmap_local_page(page_for_lower);
        rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
        if (rc > 0)
                rc = 0;
-       kunmap(page_for_lower);
+       kunmap_local(virt);
        return rc;
 }
 
@@ -140,7 +140,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
                               ecryptfs_page_idx, rc);
                        goto out;
                }
-               ecryptfs_page_virt = kmap_atomic(ecryptfs_page);
+               ecryptfs_page_virt = kmap_local_page(ecryptfs_page);
 
                /*
                 * pos: where we're now writing, offset: where the request was
@@ -163,7 +163,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
                               (data + data_offset), num_bytes);
                        data_offset += num_bytes;
                }
-               kunmap_atomic(ecryptfs_page_virt);
+               kunmap_local(ecryptfs_page_virt);
                flush_dcache_page(ecryptfs_page);
                SetPageUptodate(ecryptfs_page);
                unlock_page(ecryptfs_page);
@@ -253,11 +253,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
        int rc;
 
        offset = ((((loff_t)page_index) << PAGE_SHIFT) + offset_in_page);
-       virt = kmap(page_for_ecryptfs);
+       virt = kmap_local_page(page_for_ecryptfs);
        rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
        if (rc > 0)
                rc = 0;
-       kunmap(page_for_ecryptfs);
+       kunmap_local(virt);
        flush_dcache_page(page_for_ecryptfs);
        return rc;
 }
index d57ee15..59b5271 100644 (file)
@@ -51,7 +51,7 @@ static ssize_t efivarfs_file_write(struct file *file,
        } else {
                inode_lock(inode);
                i_size_write(inode, datasize + sizeof(attributes));
-               inode->i_mtime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                inode_unlock(inode);
        }
 
index b973a2c..db9231f 100644 (file)
@@ -25,7 +25,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb,
        if (inode) {
                inode->i_ino = get_next_ino();
                inode->i_mode = mode;
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                inode->i_flags = is_removable ? 0 : S_IMMUTABLE;
                switch (mode & S_IFMT) {
                case S_IFREG:
index 3ba94bb..3789d22 100644 (file)
@@ -105,8 +105,8 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
        inode->i_size  = be32_to_cpu(efs_inode->di_size);
        inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime);
        inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime);
-       inode->i_ctime.tv_sec = be32_to_cpu(efs_inode->di_ctime);
-       inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+       inode_set_ctime(inode, be32_to_cpu(efs_inode->di_ctime), 0);
+       inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
 
        /* this is the number of blocks in the file */
        if (inode->i_size == 0) {
index f259d92..f6dc961 100644 (file)
@@ -38,6 +38,7 @@ config EROFS_FS_DEBUG
 config EROFS_FS_XATTR
        bool "EROFS extended attributes"
        depends on EROFS_FS
+       select XXHASH
        default y
        help
          Extended attributes are name:value pairs associated with inodes by
@@ -99,6 +100,21 @@ config EROFS_FS_ZIP_LZMA
 
          If unsure, say N.
 
+config EROFS_FS_ZIP_DEFLATE
+       bool "EROFS DEFLATE compressed data support"
+       depends on EROFS_FS_ZIP
+       select ZLIB_INFLATE
+       help
+         Saying Y here includes support for reading EROFS file systems
+         containing DEFLATE compressed data.  It gives better compression
+         ratios than the default LZ4 format, while it costs more CPU
+         overhead.
+
+         DEFLATE support is an experimental feature for now and so most
+         file systems will be readable without selecting this option.
+
+         If unsure, say N.
+
 config EROFS_FS_ONDEMAND
        bool "EROFS fscache-based on-demand read support"
        depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y)
index a3a98fc..994d0b9 100644 (file)
@@ -5,4 +5,5 @@ erofs-objs := super.o inode.o data.o namei.o dir.o utils.o sysfs.o
 erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
 erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o
 erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
+erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
 erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
index b1b8465..349c331 100644 (file)
@@ -94,4 +94,6 @@ extern const struct z_erofs_decompressor erofs_decompressors[];
 /* prototypes for specific algorithms */
 int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
                            struct page **pagepool);
+int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+                              struct page **pagepool);
 #endif
index 2a29943..332ec5f 100644 (file)
@@ -148,7 +148,7 @@ static void *z_erofs_lz4_handle_overlap(struct z_erofs_lz4_decompress_ctx *ctx,
                *maptype = 0;
                return inpage;
        }
-       kunmap_atomic(inpage);
+       kunmap_local(inpage);
        might_sleep();
        src = erofs_vm_map_ram(rq->in, ctx->inpages);
        if (!src)
@@ -162,7 +162,7 @@ docopy:
        src = erofs_get_pcpubuf(ctx->inpages);
        if (!src) {
                DBG_BUGON(1);
-               kunmap_atomic(inpage);
+               kunmap_local(inpage);
                return ERR_PTR(-EFAULT);
        }
 
@@ -173,9 +173,9 @@ docopy:
                        min_t(unsigned int, total, PAGE_SIZE - *inputmargin);
 
                if (!inpage)
-                       inpage = kmap_atomic(*in);
+                       inpage = kmap_local_page(*in);
                memcpy(tmp, inpage + *inputmargin, page_copycnt);
-               kunmap_atomic(inpage);
+               kunmap_local(inpage);
                inpage = NULL;
                tmp += page_copycnt;
                total -= page_copycnt;
@@ -214,7 +214,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
        int ret, maptype;
 
        DBG_BUGON(*rq->in == NULL);
-       headpage = kmap_atomic(*rq->in);
+       headpage = kmap_local_page(*rq->in);
 
        /* LZ4 decompression inplace is only safe if zero_padding is enabled */
        if (erofs_sb_has_zero_padding(EROFS_SB(rq->sb))) {
@@ -223,7 +223,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
                                min_t(unsigned int, rq->inputsize,
                                      rq->sb->s_blocksize - rq->pageofs_in));
                if (ret) {
-                       kunmap_atomic(headpage);
+                       kunmap_local(headpage);
                        return ret;
                }
                may_inplace = !((rq->pageofs_in + rq->inputsize) &
@@ -261,7 +261,7 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
        }
 
        if (maptype == 0) {
-               kunmap_atomic(headpage);
+               kunmap_local(headpage);
        } else if (maptype == 1) {
                vm_unmap_ram(src, ctx->inpages);
        } else if (maptype == 2) {
@@ -289,7 +289,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
        /* one optimized fast path only for non bigpcluster cases yet */
        if (ctx.inpages == 1 && ctx.outpages == 1 && !rq->inplace_io) {
                DBG_BUGON(!*rq->out);
-               dst = kmap_atomic(*rq->out);
+               dst = kmap_local_page(*rq->out);
                dst_maptype = 0;
                goto dstmap_out;
        }
@@ -311,7 +311,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq,
 dstmap_out:
        ret = z_erofs_lz4_decompress_mem(&ctx, dst + rq->pageofs_out);
        if (!dst_maptype)
-               kunmap_atomic(dst);
+               kunmap_local(dst);
        else if (dst_maptype == 2)
                vm_unmap_ram(dst, ctx.outpages);
        return ret;
@@ -328,7 +328,7 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
        const unsigned int lefthalf = rq->outputsize - righthalf;
        const unsigned int interlaced_offset =
                rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out;
-       unsigned char *src, *dst;
+       u8 *src;
 
        if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
                DBG_BUGON(1);
@@ -341,22 +341,19 @@ static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
        }
 
        src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in;
-       if (rq->out[0]) {
-               dst = kmap_local_page(rq->out[0]);
-               memcpy(dst + rq->pageofs_out, src + interlaced_offset,
-                      righthalf);
-               kunmap_local(dst);
-       }
+       if (rq->out[0])
+               memcpy_to_page(rq->out[0], rq->pageofs_out,
+                              src + interlaced_offset, righthalf);
 
        if (outpages > inpages) {
                DBG_BUGON(!rq->out[outpages - 1]);
                if (rq->out[outpages - 1] != rq->in[inpages - 1]) {
-                       dst = kmap_local_page(rq->out[outpages - 1]);
-                       memcpy(dst, interlaced_offset ? src :
-                                       (src + righthalf), lefthalf);
-                       kunmap_local(dst);
+                       memcpy_to_page(rq->out[outpages - 1], 0, src +
+                                       (interlaced_offset ? 0 : righthalf),
+                                      lefthalf);
                } else if (!interlaced_offset) {
                        memmove(src, src + righthalf, lefthalf);
+                       flush_dcache_page(rq->in[inpages - 1]);
                }
        }
        kunmap_local(src);
@@ -382,4 +379,10 @@ const struct z_erofs_decompressor erofs_decompressors[] = {
                .name = "lzma"
        },
 #endif
+#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
+       [Z_EROFS_COMPRESSION_DEFLATE] = {
+               .decompress = z_erofs_deflate_decompress,
+               .name = "deflate"
+       },
+#endif
 };
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
new file mode 100644 (file)
index 0000000..19e5bde
--- /dev/null
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/module.h>
+#include <linux/zlib.h>
+#include "compress.h"
+
+struct z_erofs_deflate {
+       struct z_erofs_deflate *next;
+       struct z_stream_s z;
+       u8 bounce[PAGE_SIZE];
+};
+
+static DEFINE_SPINLOCK(z_erofs_deflate_lock);
+static unsigned int z_erofs_deflate_nstrms, z_erofs_deflate_avail_strms;
+static struct z_erofs_deflate *z_erofs_deflate_head;
+static DECLARE_WAIT_QUEUE_HEAD(z_erofs_deflate_wq);
+
+module_param_named(deflate_streams, z_erofs_deflate_nstrms, uint, 0444);
+
+void z_erofs_deflate_exit(void)
+{
+       /* there should be no running fs instance */
+       while (z_erofs_deflate_avail_strms) {
+               struct z_erofs_deflate *strm;
+
+               spin_lock(&z_erofs_deflate_lock);
+               strm = z_erofs_deflate_head;
+               if (!strm) {
+                       spin_unlock(&z_erofs_deflate_lock);
+                       continue;
+               }
+               z_erofs_deflate_head = NULL;
+               spin_unlock(&z_erofs_deflate_lock);
+
+               while (strm) {
+                       struct z_erofs_deflate *n = strm->next;
+
+                       vfree(strm->z.workspace);
+                       kfree(strm);
+                       --z_erofs_deflate_avail_strms;
+                       strm = n;
+               }
+       }
+}
+
+int __init z_erofs_deflate_init(void)
+{
+       /* by default, use # of possible CPUs instead */
+       if (!z_erofs_deflate_nstrms)
+               z_erofs_deflate_nstrms = num_possible_cpus();
+
+       for (; z_erofs_deflate_avail_strms < z_erofs_deflate_nstrms;
+            ++z_erofs_deflate_avail_strms) {
+               struct z_erofs_deflate *strm;
+
+               strm = kzalloc(sizeof(*strm), GFP_KERNEL);
+               if (!strm)
+                       goto out_failed;
+
+               /* XXX: in-kernel zlib cannot shrink windowbits currently */
+               strm->z.workspace = vmalloc(zlib_inflate_workspacesize());
+               if (!strm->z.workspace) {
+                       kfree(strm);
+                       goto out_failed;
+               }
+
+               spin_lock(&z_erofs_deflate_lock);
+               strm->next = z_erofs_deflate_head;
+               z_erofs_deflate_head = strm;
+               spin_unlock(&z_erofs_deflate_lock);
+       }
+       return 0;
+
+out_failed:
+       pr_err("failed to allocate zlib workspace\n");
+       z_erofs_deflate_exit();
+       return -ENOMEM;
+}
+
+int z_erofs_load_deflate_config(struct super_block *sb,
+                               struct erofs_super_block *dsb,
+                               struct z_erofs_deflate_cfgs *dfl, int size)
+{
+       if (!dfl || size < sizeof(struct z_erofs_deflate_cfgs)) {
+               erofs_err(sb, "invalid deflate cfgs, size=%u", size);
+               return -EINVAL;
+       }
+
+       if (dfl->windowbits > MAX_WBITS) {
+               erofs_err(sb, "unsupported windowbits %u", dfl->windowbits);
+               return -EOPNOTSUPP;
+       }
+
+       erofs_info(sb, "EXPERIMENTAL DEFLATE feature in use. Use at your own risk!");
+       return 0;
+}
+
+int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+                              struct page **pagepool)
+{
+       const unsigned int nrpages_out =
+               PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+       const unsigned int nrpages_in =
+               PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
+       struct super_block *sb = rq->sb;
+       unsigned int insz, outsz, pofs;
+       struct z_erofs_deflate *strm;
+       u8 *kin, *kout = NULL;
+       bool bounced = false;
+       int no = -1, ni = 0, j = 0, zerr, err;
+
+       /* 1. get the exact DEFLATE compressed size */
+       kin = kmap_local_page(*rq->in);
+       err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
+                       min_t(unsigned int, rq->inputsize,
+                             sb->s_blocksize - rq->pageofs_in));
+       if (err) {
+               kunmap_local(kin);
+               return err;
+       }
+
+       /* 2. get an available DEFLATE context */
+again:
+       spin_lock(&z_erofs_deflate_lock);
+       strm = z_erofs_deflate_head;
+       if (!strm) {
+               spin_unlock(&z_erofs_deflate_lock);
+               wait_event(z_erofs_deflate_wq, READ_ONCE(z_erofs_deflate_head));
+               goto again;
+       }
+       z_erofs_deflate_head = strm->next;
+       spin_unlock(&z_erofs_deflate_lock);
+
+       /* 3. multi-call decompress */
+       insz = rq->inputsize;
+       outsz = rq->outputsize;
+       zerr = zlib_inflateInit2(&strm->z, -MAX_WBITS);
+       if (zerr != Z_OK) {
+               err = -EIO;
+               goto failed_zinit;
+       }
+
+       pofs = rq->pageofs_out;
+       strm->z.avail_in = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in);
+       insz -= strm->z.avail_in;
+       strm->z.next_in = kin + rq->pageofs_in;
+       strm->z.avail_out = 0;
+
+       while (1) {
+               if (!strm->z.avail_out) {
+                       if (++no >= nrpages_out || !outsz) {
+                               erofs_err(sb, "insufficient space for decompressed data");
+                               err = -EFSCORRUPTED;
+                               break;
+                       }
+
+                       if (kout)
+                               kunmap_local(kout);
+                       strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
+                       outsz -= strm->z.avail_out;
+                       if (!rq->out[no]) {
+                               rq->out[no] = erofs_allocpage(pagepool,
+                                               GFP_KERNEL | __GFP_NOFAIL);
+                               set_page_private(rq->out[no],
+                                                Z_EROFS_SHORTLIVED_PAGE);
+                       }
+                       kout = kmap_local_page(rq->out[no]);
+                       strm->z.next_out = kout + pofs;
+                       pofs = 0;
+               }
+
+               if (!strm->z.avail_in && insz) {
+                       if (++ni >= nrpages_in) {
+                               erofs_err(sb, "invalid compressed data");
+                               err = -EFSCORRUPTED;
+                               break;
+                       }
+
+                       if (kout) { /* unlike kmap(), take care of the orders */
+                               j = strm->z.next_out - kout;
+                               kunmap_local(kout);
+                       }
+                       kunmap_local(kin);
+                       strm->z.avail_in = min_t(u32, insz, PAGE_SIZE);
+                       insz -= strm->z.avail_in;
+                       kin = kmap_local_page(rq->in[ni]);
+                       strm->z.next_in = kin;
+                       bounced = false;
+                       if (kout) {
+                               kout = kmap_local_page(rq->out[no]);
+                               strm->z.next_out = kout + j;
+                       }
+               }
+
+               /*
+                * Handle overlapping: Use bounced buffer if the compressed
+                * data is under processing; Or use short-lived pages from the
+                * on-stack pagepool where pages share among the same request
+                * and not _all_ inplace I/O pages are needed to be doubled.
+                */
+               if (!bounced && rq->out[no] == rq->in[ni]) {
+                       memcpy(strm->bounce, strm->z.next_in, strm->z.avail_in);
+                       strm->z.next_in = strm->bounce;
+                       bounced = true;
+               }
+
+               for (j = ni + 1; j < nrpages_in; ++j) {
+                       struct page *tmppage;
+
+                       if (rq->out[no] != rq->in[j])
+                               continue;
+
+                       DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
+                                                       rq->in[j]));
+                       tmppage = erofs_allocpage(pagepool,
+                                                 GFP_KERNEL | __GFP_NOFAIL);
+                       set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
+                       copy_highpage(tmppage, rq->in[j]);
+                       rq->in[j] = tmppage;
+               }
+
+               zerr = zlib_inflate(&strm->z, Z_SYNC_FLUSH);
+               if (zerr != Z_OK || !(outsz + strm->z.avail_out)) {
+                       if (zerr == Z_OK && rq->partial_decoding)
+                               break;
+                       if (zerr == Z_STREAM_END && !outsz)
+                               break;
+                       erofs_err(sb, "failed to decompress %d in[%u] out[%u]",
+                                 zerr, rq->inputsize, rq->outputsize);
+                       err = -EFSCORRUPTED;
+                       break;
+               }
+       }
+
+       if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
+               err = -EIO;
+       if (kout)
+               kunmap_local(kout);
+failed_zinit:
+       kunmap_local(kin);
+       /* 4. push back DEFLATE stream context to the global list */
+       spin_lock(&z_erofs_deflate_lock);
+       strm->next = z_erofs_deflate_head;
+       z_erofs_deflate_head = strm;
+       spin_unlock(&z_erofs_deflate_lock);
+       wake_up(&z_erofs_deflate_wq);
+       return err;
+}
index 2c7b16e..a03ec70 100644 (file)
@@ -13,6 +13,7 @@
 
 #define EROFS_FEATURE_COMPAT_SB_CHKSUM          0x00000001
 #define EROFS_FEATURE_COMPAT_MTIME              0x00000002
+#define EROFS_FEATURE_COMPAT_XATTR_FILTER      0x00000004
 
 /*
  * Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -81,7 +82,8 @@ struct erofs_super_block {
        __u8 xattr_prefix_count;        /* # of long xattr name prefixes */
        __le32 xattr_prefix_start;      /* start of long xattr prefixes */
        __le64 packed_nid;      /* nid of the special packed inode */
-       __u8 reserved2[24];
+       __u8 xattr_filter_reserved; /* reserved for xattr name filter */
+       __u8 reserved2[23];
 };
 
 /*
@@ -200,7 +202,7 @@ struct erofs_inode_extended {
  * for read-only fs, no need to introduce h_refcount
  */
 struct erofs_xattr_ibody_header {
-       __le32 h_reserved;
+       __le32 h_name_filter;           /* bit value 1 indicates not-present */
        __u8   h_shared_count;
        __u8   h_reserved2[7];
        __le32 h_shared_xattrs[];       /* shared xattr id array */
@@ -221,6 +223,10 @@ struct erofs_xattr_ibody_header {
 #define EROFS_XATTR_LONG_PREFIX                0x80
 #define EROFS_XATTR_LONG_PREFIX_MASK   0x7f
 
+#define EROFS_XATTR_FILTER_BITS                32
+#define EROFS_XATTR_FILTER_DEFAULT     UINT32_MAX
+#define EROFS_XATTR_FILTER_SEED                0x25BBE08F
+
 /* xattr entry (for both inline & shared xattrs) */
 struct erofs_xattr_entry {
        __u8   e_name_len;      /* length of name */
@@ -289,6 +295,7 @@ struct erofs_dirent {
 enum {
        Z_EROFS_COMPRESSION_LZ4         = 0,
        Z_EROFS_COMPRESSION_LZMA        = 1,
+       Z_EROFS_COMPRESSION_DEFLATE     = 2,
        Z_EROFS_COMPRESSION_MAX
 };
 #define Z_EROFS_ALL_COMPR_ALGS         ((1 << Z_EROFS_COMPRESSION_MAX) - 1)
@@ -309,6 +316,12 @@ struct z_erofs_lzma_cfgs {
 
 #define Z_EROFS_LZMA_MAX_DICT_SIZE     (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
 
+/* 6 bytes (+ length field = 8 bytes) */
+struct z_erofs_deflate_cfgs {
+       u8 windowbits;                  /* 8..15 for DEFLATE */
+       u8 reserved[5];
+} __packed;
+
 /*
  * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
  *  e.g. for 4k logical cluster size,      4B        if compacted 2B is off;
index d70b12b..edc8ec7 100644 (file)
@@ -105,8 +105,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                set_nlink(inode, le32_to_cpu(die->i_nlink));
 
                /* extended inode has its own timestamp */
-               inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime);
-               inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec);
+               inode_set_ctime(inode, le64_to_cpu(die->i_mtime),
+                               le32_to_cpu(die->i_mtime_nsec));
 
                inode->i_size = le64_to_cpu(die->i_size);
 
@@ -148,8 +148,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                set_nlink(inode, le16_to_cpu(dic->i_nlink));
 
                /* use build time for compact inodes */
-               inode->i_ctime.tv_sec = sbi->build_time;
-               inode->i_ctime.tv_nsec = sbi->build_time_nsec;
+               inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec);
 
                inode->i_size = le32_to_cpu(dic->i_size);
                if (erofs_inode_is_data_compressed(vi->datalayout))
@@ -176,14 +175,12 @@ static void *erofs_read_inode(struct erofs_buf *buf,
                vi->chunkbits = sb->s_blocksize_bits +
                        (vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
        }
-       inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
-       inode->i_atime.tv_sec = inode->i_ctime.tv_sec;
-       inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
-       inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
+       inode->i_mtime = inode->i_atime = inode_get_ctime(inode);
 
        inode->i_flags &= ~S_DAX;
        if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
-           vi->datalayout == EROFS_INODE_FLAT_PLAIN)
+           (vi->datalayout == EROFS_INODE_FLAT_PLAIN ||
+            vi->datalayout == EROFS_INODE_CHUNK_BASED))
                inode->i_flags |= S_DAX;
 
        if (!nblks)
@@ -372,7 +369,7 @@ int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
        stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
                                  STATX_ATTR_IMMUTABLE);
 
-       generic_fillattr(idmap, inode, stat);
+       generic_fillattr(idmap, request_mask, inode, stat);
        return 0;
 }
 
index 36e32fa..4ff88d0 100644 (file)
@@ -151,6 +151,7 @@ struct erofs_sb_info {
        u32 xattr_prefix_start;
        u8 xattr_prefix_count;
        struct erofs_xattr_prefix_item *xattr_prefixes;
+       unsigned int xattr_filter_reserved;
 #endif
        u16 device_id_mask;     /* valid bits of device id to be used */
 
@@ -251,6 +252,7 @@ EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
 EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
 EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES)
 EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
+EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
 
 /* atomic flag definitions */
 #define EROFS_I_EA_INITED_BIT  0
@@ -270,6 +272,7 @@ struct erofs_inode {
        unsigned char inode_isize;
        unsigned int xattr_isize;
 
+       unsigned int xattr_name_filter;
        unsigned int xattr_shared_count;
        unsigned int *xattr_shared_xattrs;
 
@@ -519,6 +522,26 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb,
 }
 #endif /* !CONFIG_EROFS_FS_ZIP_LZMA */
 
+#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
+int __init z_erofs_deflate_init(void);
+void z_erofs_deflate_exit(void);
+int z_erofs_load_deflate_config(struct super_block *sb,
+                               struct erofs_super_block *dsb,
+                               struct z_erofs_deflate_cfgs *dfl, int size);
+#else
+static inline int z_erofs_deflate_init(void) { return 0; }
+static inline int z_erofs_deflate_exit(void) { return 0; }
+static inline int z_erofs_load_deflate_config(struct super_block *sb,
+                       struct erofs_super_block *dsb,
+                       struct z_erofs_deflate_cfgs *dfl, int size) {
+       if (dfl) {
+               erofs_err(sb, "deflate algorithm isn't enabled");
+               return -EINVAL;
+       }
+       return 0;
+}
+#endif /* !CONFIG_EROFS_FS_ZIP_DEFLATE */
+
 #ifdef CONFIG_EROFS_FS_ONDEMAND
 int erofs_fscache_register_fs(struct super_block *sb);
 void erofs_fscache_unregister_fs(struct super_block *sb);
index 9d6a3c6..44a24d5 100644 (file)
 #include <trace/events/erofs.h>
 
 static struct kmem_cache *erofs_inode_cachep __read_mostly;
-struct file_system_type erofs_fs_type;
 
-void _erofs_err(struct super_block *sb, const char *function,
-               const char *fmt, ...)
+void _erofs_err(struct super_block *sb, const char *func, const char *fmt, ...)
 {
        struct va_format vaf;
        va_list args;
@@ -32,12 +30,11 @@ void _erofs_err(struct super_block *sb, const char *function,
        vaf.fmt = fmt;
        vaf.va = &args;
 
-       pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf);
+       pr_err("(device %s): %s: %pV", sb->s_id, func, &vaf);
        va_end(args);
 }
 
-void _erofs_info(struct super_block *sb, const char *function,
-                const char *fmt, ...)
+void _erofs_info(struct super_block *sb, const char *func, const char *fmt, ...)
 {
        struct va_format vaf;
        va_list args;
@@ -102,11 +99,9 @@ static void erofs_free_inode(struct inode *inode)
 {
        struct erofs_inode *vi = EROFS_I(inode);
 
-       /* be careful of RCU symlink path */
        if (inode->i_op == &erofs_fast_symlink_iops)
                kfree(inode->i_link);
        kfree(vi->xattr_shared_xattrs);
-
        kmem_cache_free(erofs_inode_cachep, vi);
 }
 
@@ -119,8 +114,7 @@ static bool check_layout_compatibility(struct super_block *sb,
 
        /* check if current kernel meets all mandatory requirements */
        if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
-               erofs_err(sb,
-                         "unidentified incompatible feature %x, please upgrade kernel version",
+               erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel",
                           feature & ~EROFS_ALL_FEATURE_INCOMPAT);
                return false;
        }
@@ -201,6 +195,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
                case Z_EROFS_COMPRESSION_LZMA:
                        ret = z_erofs_load_lzma_config(sb, dsb, data, size);
                        break;
+               case Z_EROFS_COMPRESSION_DEFLATE:
+                       ret = z_erofs_load_deflate_config(sb, dsb, data, size);
+                       break;
                default:
                        DBG_BUGON(1);
                        ret = -EFAULT;
@@ -388,6 +385,7 @@ static int erofs_read_superblock(struct super_block *sb)
        sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
        sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start);
        sbi->xattr_prefix_count = dsb->xattr_prefix_count;
+       sbi->xattr_filter_reserved = dsb->xattr_filter_reserved;
 #endif
        sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
        sbi->root_nid = le16_to_cpu(dsb->root_nid);
@@ -420,16 +418,11 @@ static int erofs_read_superblock(struct super_block *sb)
 
        if (erofs_is_fscache_mode(sb))
                erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
-       if (erofs_sb_has_fragments(sbi))
-               erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!");
-       if (erofs_sb_has_dedupe(sbi))
-               erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!");
 out:
        erofs_put_metabuf(&buf);
        return ret;
 }
 
-/* set up default EROFS parameters */
 static void erofs_default_options(struct erofs_fs_context *ctx)
 {
 #ifdef CONFIG_EROFS_FS_ZIP
@@ -731,7 +724,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
        xa_init(&sbi->managed_pslots);
 #endif
 
-       /* get the root inode */
        inode = erofs_iget(sb, ROOT_NID(sbi));
        if (IS_ERR(inode))
                return PTR_ERR(inode);
@@ -748,7 +740,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
                return -ENOMEM;
 
        erofs_shrinker_register(sb);
-       /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */
        if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) {
                sbi->packed_inode = erofs_iget(sb, sbi->packed_nid);
                if (IS_ERR(sbi->packed_inode)) {
@@ -881,16 +872,10 @@ static int erofs_init_fs_context(struct fs_context *fc)
        return 0;
 }
 
-/*
- * could be triggered after deactivate_locked_super()
- * is called, thus including umount and failed to initialize.
- */
 static void erofs_kill_sb(struct super_block *sb)
 {
        struct erofs_sb_info *sbi;
 
-       WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
-
        /* pseudo mount for anon inodes */
        if (sb->s_flags & SB_KERNMOUNT) {
                kill_anon_super(sb);
@@ -915,7 +900,6 @@ static void erofs_kill_sb(struct super_block *sb)
        sb->s_fs_info = NULL;
 }
 
-/* called when ->s_root is non-NULL */
 static void erofs_put_super(struct super_block *sb)
 {
        struct erofs_sb_info *const sbi = EROFS_SB(sb);
@@ -952,9 +936,9 @@ static int __init erofs_module_init(void)
        erofs_check_ondisk_layout_definitions();
 
        erofs_inode_cachep = kmem_cache_create("erofs_inode",
-                                              sizeof(struct erofs_inode), 0,
-                                              SLAB_RECLAIM_ACCOUNT,
-                                              erofs_inode_init_once);
+                       sizeof(struct erofs_inode), 0,
+                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+                       erofs_inode_init_once);
        if (!erofs_inode_cachep)
                return -ENOMEM;
 
@@ -966,6 +950,10 @@ static int __init erofs_module_init(void)
        if (err)
                goto lzma_err;
 
+       err = z_erofs_deflate_init();
+       if (err)
+               goto deflate_err;
+
        erofs_pcpubuf_init();
        err = z_erofs_init_zip_subsystem();
        if (err)
@@ -986,6 +974,8 @@ fs_err:
 sysfs_err:
        z_erofs_exit_zip_subsystem();
 zip_err:
+       z_erofs_deflate_exit();
+deflate_err:
        z_erofs_lzma_exit();
 lzma_err:
        erofs_exit_shrinker();
@@ -1003,13 +993,13 @@ static void __exit erofs_module_exit(void)
 
        erofs_exit_sysfs();
        z_erofs_exit_zip_subsystem();
+       z_erofs_deflate_exit();
        z_erofs_lzma_exit();
        erofs_exit_shrinker();
        kmem_cache_destroy(erofs_inode_cachep);
        erofs_pcpubuf_exit();
 }
 
-/* get filesystem statistics */
 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct super_block *sb = dentry->d_sb;
index 40178b6..09d3416 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) 2021-2022, Alibaba Cloud
  */
 #include <linux/security.h>
+#include <linux/xxhash.h>
 #include "xattr.h"
 
 struct erofs_xattr_iter {
@@ -87,6 +88,7 @@ static int erofs_init_inode_xattrs(struct inode *inode)
        }
 
        ih = it.kaddr + erofs_blkoff(sb, it.pos);
+       vi->xattr_name_filter = le32_to_cpu(ih->h_name_filter);
        vi->xattr_shared_count = ih->h_shared_count;
        vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
                                                sizeof(uint), GFP_KERNEL);
@@ -392,7 +394,10 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
                   void *buffer, size_t buffer_size)
 {
        int ret;
+       unsigned int hashbit;
        struct erofs_xattr_iter it;
+       struct erofs_inode *vi = EROFS_I(inode);
+       struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
 
        if (!name)
                return -EINVAL;
@@ -401,6 +406,15 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
        if (ret)
                return ret;
 
+       /* reserved flag is non-zero if there's any change of on-disk format */
+       if (erofs_sb_has_xattr_filter(sbi) && !sbi->xattr_filter_reserved) {
+               hashbit = xxh32(name, strlen(name),
+                               EROFS_XATTR_FILTER_SEED + index);
+               hashbit &= EROFS_XATTR_FILTER_BITS - 1;
+               if (vi->xattr_name_filter & (1U << hashbit))
+                       return -ENOATTR;
+       }
+
        it.index = index;
        it.name = (struct qstr)QSTR_INIT(name, strlen(name));
        if (it.name.len > EROFS_NAME_LEN)
index 5f1890e..036f610 100644 (file)
@@ -143,22 +143,17 @@ static inline void z_erofs_onlinepage_split(struct page *page)
        atomic_inc((atomic_t *)&page->private);
 }
 
-static inline void z_erofs_page_mark_eio(struct page *page)
+static void z_erofs_onlinepage_endio(struct page *page, int err)
 {
-       int orig;
+       int orig, v;
+
+       DBG_BUGON(!PagePrivate(page));
 
        do {
                orig = atomic_read((atomic_t *)&page->private);
-       } while (atomic_cmpxchg((atomic_t *)&page->private, orig,
-                               orig | Z_EROFS_PAGE_EIO) != orig);
-}
-
-static inline void z_erofs_onlinepage_endio(struct page *page)
-{
-       unsigned int v;
+               v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0);
+       } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig);
 
-       DBG_BUGON(!PagePrivate(page));
-       v = atomic_dec_return((atomic_t *)&page->private);
        if (!(v & ~Z_EROFS_PAGE_EIO)) {
                set_page_private(page, 0);
                ClearPagePrivate(page);
@@ -507,19 +502,17 @@ enum z_erofs_pclustermode {
         */
        Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
        /*
-        * The current collection has been linked with the owned chain, and
-        * could also be linked with the remaining collections, which means
-        * if the processing page is the tail page of the collection, thus
-        * the current collection can safely use the whole page (since
-        * the previous collection is under control) for in-place I/O, as
-        * illustrated below:
-        *  ________________________________________________________________
-        * |  tail (partial) page |          head (partial) page           |
-        * |  (of the current cl) |      (of the previous collection)      |
-        * |                      |                                        |
-        * |__PCLUSTER_FOLLOWED___|___________PCLUSTER_FOLLOWED____________|
+        * The pcluster was just linked to a decompression chain by us.  It can
+        * also be linked with the remaining pclusters, which means if the
+        * processing page is the tail page of a pcluster, this pcluster can
+        * safely use the whole page (since the previous pcluster is within the
+        * same chain) for in-place I/O, as illustrated below:
+        *  ___________________________________________________
+        * |  tail (partial) page  |    head (partial) page    |
+        * |  (of the current pcl) |   (of the previous pcl)   |
+        * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____|
         *
-        * [  (*) the above page can be used as inplace I/O.               ]
+        * [  (*) the page above can be used as inplace I/O.   ]
         */
        Z_EROFS_PCLUSTER_FOLLOWED,
 };
@@ -535,8 +528,6 @@ struct z_erofs_decompress_frontend {
        z_erofs_next_pcluster_t owned_head;
        enum z_erofs_pclustermode mode;
 
-       /* used for applying cache strategy on the fly */
-       bool backmost;
        erofs_off_t headoffset;
 
        /* a pointer used to pick up inplace I/O pages */
@@ -545,7 +536,7 @@ struct z_erofs_decompress_frontend {
 
 #define DECOMPRESS_FRONTEND_INIT(__i) { \
        .inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
-       .mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true }
+       .mode = Z_EROFS_PCLUSTER_FOLLOWED }
 
 static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
 {
@@ -554,7 +545,7 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
        if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
                return false;
 
-       if (fe->backmost)
+       if (!(fe->map.m_flags & EROFS_MAP_FULL_MAPPED))
                return true;
 
        if (cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
@@ -851,9 +842,11 @@ err_out:
        return err;
 }
 
-static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
+static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
 {
        struct erofs_map_blocks *map = &fe->map;
+       struct super_block *sb = fe->inode->i_sb;
+       erofs_blk_t blknr = erofs_blknr(sb, map->m_pa);
        struct erofs_workgroup *grp = NULL;
        int ret;
 
@@ -863,8 +856,7 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
        DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
 
        if (!(map->m_flags & EROFS_MAP_META)) {
-               grp = erofs_find_workgroup(fe->inode->i_sb,
-                                          map->m_pa >> PAGE_SHIFT);
+               grp = erofs_find_workgroup(sb, blknr);
        } else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
                DBG_BUGON(1);
                return -EFSCORRUPTED;
@@ -883,9 +875,26 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
        } else if (ret) {
                return ret;
        }
+
        z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
                                Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
-       /* since file-backed online pages are traversed in reverse order */
+       if (!z_erofs_is_inline_pcluster(fe->pcl)) {
+               /* bind cache first when cached decompression is preferred */
+               z_erofs_bind_cache(fe);
+       } else {
+               void *mptr;
+
+               mptr = erofs_read_metabuf(&map->buf, sb, blknr, EROFS_NO_KMAP);
+               if (IS_ERR(mptr)) {
+                       ret = PTR_ERR(mptr);
+                       erofs_err(sb, "failed to get inline data %d", ret);
+                       return ret;
+               }
+               get_page(map->buf.page);
+               WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page);
+               fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
+       }
+       /* file-backed inplace I/O pages are traversed in reverse order */
        fe->icur = z_erofs_pclusterpages(fe->pcl);
        return 0;
 }
@@ -908,12 +917,12 @@ void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
        call_rcu(&pcl->rcu, z_erofs_rcu_callback);
 }
 
-static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
+static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
 {
        struct z_erofs_pcluster *pcl = fe->pcl;
 
        if (!pcl)
-               return false;
+               return;
 
        z_erofs_bvec_iter_end(&fe->biter);
        mutex_unlock(&pcl->lock);
@@ -929,37 +938,29 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
                erofs_workgroup_put(&pcl->obj);
 
        fe->pcl = NULL;
-       return true;
 }
 
-static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
-                                struct page *page, unsigned int pageofs,
-                                unsigned int len)
+static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
+                       unsigned int cur, unsigned int end, erofs_off_t pos)
 {
-       struct super_block *sb = inode->i_sb;
-       struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode;
+       struct inode *packed_inode = EROFS_SB(sb)->packed_inode;
        struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
-       u8 *src, *dst;
-       unsigned int i, cnt;
+       unsigned int cnt;
+       u8 *src;
 
        if (!packed_inode)
                return -EFSCORRUPTED;
 
        buf.inode = packed_inode;
-       pos += EROFS_I(inode)->z_fragmentoff;
-       for (i = 0; i < len; i += cnt) {
-               cnt = min_t(unsigned int, len - i,
+       for (; cur < end; cur += cnt, pos += cnt) {
+               cnt = min_t(unsigned int, end - cur,
                            sb->s_blocksize - erofs_blkoff(sb, pos));
                src = erofs_bread(&buf, erofs_blknr(sb, pos), EROFS_KMAP);
                if (IS_ERR(src)) {
                        erofs_put_metabuf(&buf);
                        return PTR_ERR(src);
                }
-
-               dst = kmap_local_page(page);
-               memcpy(dst + pageofs + i, src + erofs_blkoff(sb, pos), cnt);
-               kunmap_local(dst);
-               pos += cnt;
+               memcpy_to_page(page, cur, src + erofs_blkoff(sb, pos), cnt);
        }
        erofs_put_metabuf(&buf);
        return 0;
@@ -972,94 +973,60 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
        struct erofs_map_blocks *const map = &fe->map;
        const loff_t offset = page_offset(page);
        bool tight = true, exclusive;
-       unsigned int cur, end, spiltted;
+       unsigned int cur, end, len, split;
        int err = 0;
 
-       /* register locked file pages as online pages in pack */
        z_erofs_onlinepage_init(page);
 
-       spiltted = 0;
+       split = 0;
        end = PAGE_SIZE;
 repeat:
-       cur = end - 1;
-
-       if (offset + cur < map->m_la ||
-           offset + cur >= map->m_la + map->m_llen) {
-               if (z_erofs_collector_end(fe))
-                       fe->backmost = false;
-               map->m_la = offset + cur;
+       if (offset + end - 1 < map->m_la ||
+           offset + end - 1 >= map->m_la + map->m_llen) {
+               z_erofs_pcluster_end(fe);
+               map->m_la = offset + end - 1;
                map->m_llen = 0;
                err = z_erofs_map_blocks_iter(inode, map, 0);
                if (err)
                        goto out;
-       } else {
-               if (fe->pcl)
-                       goto hitted;
-               /* didn't get a valid pcluster previously (very rare) */
        }
 
-       if (!(map->m_flags & EROFS_MAP_MAPPED) ||
-           map->m_flags & EROFS_MAP_FRAGMENT)
-               goto hitted;
-
-       err = z_erofs_collector_begin(fe);
-       if (err)
-               goto out;
+       cur = offset > map->m_la ? 0 : map->m_la - offset;
+       /* bump split parts first to avoid several separate cases */
+       ++split;
 
-       if (z_erofs_is_inline_pcluster(fe->pcl)) {
-               void *mp;
-
-               mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb,
-                                       erofs_blknr(inode->i_sb, map->m_pa),
-                                       EROFS_NO_KMAP);
-               if (IS_ERR(mp)) {
-                       err = PTR_ERR(mp);
-                       erofs_err(inode->i_sb,
-                                 "failed to get inline page, err %d", err);
-                       goto out;
-               }
-               get_page(fe->map.buf.page);
-               WRITE_ONCE(fe->pcl->compressed_bvecs[0].page,
-                          fe->map.buf.page);
-               fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
-       } else {
-               /* bind cache first when cached decompression is preferred */
-               z_erofs_bind_cache(fe);
-       }
-hitted:
-       /*
-        * Ensure the current partial page belongs to this submit chain rather
-        * than other concurrent submit chains or the noio(bypass) chain since
-        * those chains are handled asynchronously thus the page cannot be used
-        * for inplace I/O or bvpage (should be processed in a strict order.)
-        */
-       tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
-
-       cur = end - min_t(unsigned int, offset + end - map->m_la, end);
        if (!(map->m_flags & EROFS_MAP_MAPPED)) {
                zero_user_segment(page, cur, end);
+               tight = false;
                goto next_part;
        }
+
        if (map->m_flags & EROFS_MAP_FRAGMENT) {
-               unsigned int pageofs, skip, len;
+               erofs_off_t fpos = offset + cur - map->m_la;
 
-               if (offset > map->m_la) {
-                       pageofs = 0;
-                       skip = offset - map->m_la;
-               } else {
-                       pageofs = map->m_la & ~PAGE_MASK;
-                       skip = 0;
-               }
-               len = min_t(unsigned int, map->m_llen - skip, end - cur);
-               err = z_erofs_read_fragment(inode, skip, page, pageofs, len);
+               len = min_t(unsigned int, map->m_llen - fpos, end - cur);
+               err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len,
+                               EROFS_I(inode)->z_fragmentoff + fpos);
                if (err)
                        goto out;
-               ++spiltted;
                tight = false;
                goto next_part;
        }
 
-       exclusive = (!cur && (!spiltted || tight));
+       if (!fe->pcl) {
+               err = z_erofs_pcluster_begin(fe);
+               if (err)
+                       goto out;
+       }
+
+       /*
+        * Ensure the current partial page belongs to this submit chain rather
+        * than other concurrent submit chains or the noio(bypass) chain since
+        * those chains are handled asynchronously thus the page cannot be used
+        * for inplace I/O or bvpage (should be processed in a strict order.)
+        */
+       tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
+       exclusive = (!cur && ((split <= 1) || tight));
        if (cur)
                tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
 
@@ -1072,8 +1039,6 @@ hitted:
                goto out;
 
        z_erofs_onlinepage_split(page);
-       /* bump up the number of spiltted parts of a page */
-       ++spiltted;
        if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
                fe->pcl->multibases = true;
        if (fe->pcl->length < offset + end - map->m_la) {
@@ -1094,9 +1059,7 @@ next_part:
                goto repeat;
 
 out:
-       if (err)
-               z_erofs_page_mark_eio(page);
-       z_erofs_onlinepage_endio(page);
+       z_erofs_onlinepage_endio(page, err);
        return err;
 }
 
@@ -1144,10 +1107,11 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
                                         struct z_erofs_bvec *bvec)
 {
        struct z_erofs_bvec_item *item;
+       unsigned int pgnr;
 
-       if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
-               unsigned int pgnr;
-
+       if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) &&
+           (bvec->end == PAGE_SIZE ||
+            bvec->offset + bvec->end == be->pcl->length)) {
                pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
                DBG_BUGON(pgnr >= be->nr_pages);
                if (!be->decompressed_pages[pgnr]) {
@@ -1198,9 +1162,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
                        cur += len;
                }
                kunmap_local(dst);
-               if (err)
-                       z_erofs_page_mark_eio(bvi->bvec.page);
-               z_erofs_onlinepage_endio(bvi->bvec.page);
+               z_erofs_onlinepage_endio(bvi->bvec.page, err);
                list_del(p);
                kfree(bvi);
        }
@@ -1371,9 +1333,7 @@ out:
                /* recycle all individual short-lived pages */
                if (z_erofs_put_shortlivedpage(be->pagepool, page))
                        continue;
-               if (err)
-                       z_erofs_page_mark_eio(page);
-               z_erofs_onlinepage_endio(page);
+               z_erofs_onlinepage_endio(page, err);
        }
 
        if (be->decompressed_pages != be->onstack_pages)
@@ -1409,7 +1369,10 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
                owned = READ_ONCE(be.pcl->next);
 
                z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0);
-               erofs_workgroup_put(&be.pcl->obj);
+               if (z_erofs_is_inline_pcluster(be.pcl))
+                       z_erofs_free_pcluster(be.pcl);
+               else
+                       erofs_workgroup_put(&be.pcl->obj);
        }
 }
 
@@ -1841,21 +1804,16 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
        }
 
        cur = map->m_la + map->m_llen - 1;
-       while (cur >= end) {
+       while ((cur >= end) && (cur < i_size_read(inode))) {
                pgoff_t index = cur >> PAGE_SHIFT;
                struct page *page;
 
                page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
                if (page) {
-                       if (PageUptodate(page)) {
+                       if (PageUptodate(page))
                                unlock_page(page);
-                       } else {
-                               err = z_erofs_do_read_page(f, page);
-                               if (err)
-                                       erofs_err(inode->i_sb,
-                                                 "readmore error at page %lu @ nid %llu",
-                                                 index, EROFS_I(inode)->nid);
-                       }
+                       else
+                               (void)z_erofs_do_read_page(f, page);
                        put_page(page);
                }
 
@@ -1867,25 +1825,25 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
 
 static int z_erofs_read_folio(struct file *file, struct folio *folio)
 {
-       struct page *page = &folio->page;
-       struct inode *const inode = page->mapping->host;
+       struct inode *const inode = folio->mapping->host;
        struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
        struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
        int err;
 
-       trace_erofs_readpage(page, false);
-       f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
+       trace_erofs_read_folio(folio, false);
+       f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
 
        z_erofs_pcluster_readmore(&f, NULL, true);
-       err = z_erofs_do_read_page(&f, page);
+       err = z_erofs_do_read_page(&f, &folio->page);
        z_erofs_pcluster_readmore(&f, NULL, false);
-       (void)z_erofs_collector_end(&f);
+       z_erofs_pcluster_end(&f);
 
        /* if some compressed cluster ready, need submit them anyway */
        z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false);
 
-       if (err)
-               erofs_err(inode->i_sb, "failed to read, err [%d]", err);
+       if (err && err != -EINTR)
+               erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu",
+                         err, folio->index, EROFS_I(inode)->nid);
 
        erofs_put_metabuf(&f.map.buf);
        erofs_release_pages(&f.pagepool);
@@ -1897,38 +1855,35 @@ static void z_erofs_readahead(struct readahead_control *rac)
        struct inode *const inode = rac->mapping->host;
        struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
        struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
-       struct page *head = NULL, *page;
-       unsigned int nr_pages;
+       struct folio *head = NULL, *folio;
+       unsigned int nr_folios;
+       int err;
 
        f.headoffset = readahead_pos(rac);
 
        z_erofs_pcluster_readmore(&f, rac, true);
-       nr_pages = readahead_count(rac);
-       trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
+       nr_folios = readahead_count(rac);
+       trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false);
 
-       while ((page = readahead_page(rac))) {
-               set_page_private(page, (unsigned long)head);
-               head = page;
+       while ((folio = readahead_folio(rac))) {
+               folio->private = head;
+               head = folio;
        }
 
+       /* traverse in reverse order for best metadata I/O performance */
        while (head) {
-               struct page *page = head;
-               int err;
-
-               /* traversal in reverse order */
-               head = (void *)page_private(page);
+               folio = head;
+               head = folio_get_private(folio);
 
-               err = z_erofs_do_read_page(&f, page);
-               if (err)
-                       erofs_err(inode->i_sb,
-                                 "readahead error at page %lu @ nid %llu",
-                                 page->index, EROFS_I(inode)->nid);
-               put_page(page);
+               err = z_erofs_do_read_page(&f, &folio->page);
+               if (err && err != -EINTR)
+                       erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
+                                 folio->index, EROFS_I(inode)->nid);
        }
        z_erofs_pcluster_readmore(&f, rac, false);
-       (void)z_erofs_collector_end(&f);
+       z_erofs_pcluster_end(&f);
 
-       z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_pages), true);
+       z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios), true);
        erofs_put_metabuf(&f.map.buf);
        erofs_release_pages(&f.pagepool);
 }
index 1909dda..7b55111 100644 (file)
@@ -561,8 +561,9 @@ static int z_erofs_do_map_blocks(struct inode *inode,
 
        if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
            ((flags & EROFS_GET_BLOCKS_READMORE) &&
-            map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA &&
-            map->m_llen >= i_blocksize(inode))) {
+            (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
+             map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE) &&
+             map->m_llen >= i_blocksize(inode))) {
                err = z_erofs_get_extent_decompressedlen(&m);
                if (!err)
                        map->m_flags |= EROFS_MAP_FULL_MAPPED;
index 8aa36cd..33a918f 100644 (file)
@@ -189,7 +189,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
 {
        lockdep_assert_held(&ctx->wqh.lock);
 
-       *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+       *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count;
        ctx->count -= *cnt;
 }
 EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
index 4b1b336..1d9a71a 100644 (file)
@@ -975,15 +975,11 @@ again:
 
 static int ep_alloc(struct eventpoll **pep)
 {
-       int error;
-       struct user_struct *user;
        struct eventpoll *ep;
 
-       user = get_current_user();
-       error = -ENOMEM;
        ep = kzalloc(sizeof(*ep), GFP_KERNEL);
        if (unlikely(!ep))
-               goto free_uid;
+               return -ENOMEM;
 
        mutex_init(&ep->mtx);
        rwlock_init(&ep->lock);
@@ -992,16 +988,12 @@ static int ep_alloc(struct eventpoll **pep)
        INIT_LIST_HEAD(&ep->rdllist);
        ep->rbr = RB_ROOT_CACHED;
        ep->ovflist = EP_UNACTIVE_PTR;
-       ep->user = user;
+       ep->user = get_current_user();
        refcount_set(&ep->refcount, 1);
 
        *pep = ep;
 
        return 0;
-
-free_uid:
-       free_uid(user);
-       return error;
 }
 
 /*
index 9f42f25..e918dec 100644 (file)
@@ -69,7 +69,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
        }
        sbi->map_sectors = ((need_map_size - 1) >>
                        (sb->s_blocksize_bits)) + 1;
-       sbi->vol_amap = kmalloc_array(sbi->map_sectors,
+       sbi->vol_amap = kvmalloc_array(sbi->map_sectors,
                                sizeof(struct buffer_head *), GFP_KERNEL);
        if (!sbi->vol_amap)
                return -ENOMEM;
@@ -84,7 +84,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
                        while (j < i)
                                brelse(sbi->vol_amap[j++]);
 
-                       kfree(sbi->vol_amap);
+                       kvfree(sbi->vol_amap);
                        sbi->vol_amap = NULL;
                        return -EIO;
                }
@@ -138,7 +138,7 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi)
        for (i = 0; i < sbi->map_sectors; i++)
                __brelse(sbi->vol_amap[i]);
 
-       kfree(sbi->vol_amap);
+       kvfree(sbi->vol_amap);
 }
 
 int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
index 9575741..e1586bb 100644 (file)
@@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
 {
        int i, err;
        struct exfat_entry_set_cache es;
+       unsigned int uni_len = 0, len;
 
        err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES);
        if (err)
@@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
                if (exfat_get_entry_type(ep) != TYPE_EXTEND)
                        break;
 
-               exfat_extract_uni_name(ep, uniname);
+               len = exfat_extract_uni_name(ep, uniname);
+               uni_len += len;
+               if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
+                       break;
                uniname += EXFAT_FILE_NAME_LEN;
        }
 
@@ -214,7 +218,10 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb)
        exfat_init_namebuf(nb);
 }
 
-/* skip iterating emit_dots when dir is empty */
+/*
+ * Before calling dir_emit*(), sbi->s_lock should be released
+ * because page fault can occur in dir_emit*().
+ */
 #define ITER_POS_FILLED_DOTS    (2)
 static int exfat_iterate(struct file *file, struct dir_context *ctx)
 {
@@ -229,11 +236,10 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
        int err = 0, fake_offset = 0;
 
        exfat_init_namebuf(nb);
-       mutex_lock(&EXFAT_SB(sb)->s_lock);
 
        cpos = ctx->pos;
        if (!dir_emit_dots(file, ctx))
-               goto unlock;
+               goto out;
 
        if (ctx->pos == ITER_POS_FILLED_DOTS) {
                cpos = 0;
@@ -245,16 +251,18 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
        /* name buffer should be allocated before use */
        err = exfat_alloc_namebuf(nb);
        if (err)
-               goto unlock;
+               goto out;
 get_new:
+       mutex_lock(&EXFAT_SB(sb)->s_lock);
+
        if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode))
                goto end_of_dir;
 
        err = exfat_readdir(inode, &cpos, &de);
        if (err) {
                /*
-                * At least we tried to read a sector.  Move cpos to next sector
-                * position (should be aligned).
+                * At least we tried to read a sector.
+                * Move cpos to next sector position (should be aligned).
                 */
                if (err == -EIO) {
                        cpos += 1 << (sb->s_blocksize_bits);
@@ -277,16 +285,10 @@ get_new:
                inum = iunique(sb, EXFAT_ROOT_INO);
        }
 
-       /*
-        * Before calling dir_emit(), sb_lock should be released.
-        * Because page fault can occur in dir_emit() when the size
-        * of buffer given from user is larger than one page size.
-        */
        mutex_unlock(&EXFAT_SB(sb)->s_lock);
        if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum,
                        (de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
-               goto out_unlocked;
-       mutex_lock(&EXFAT_SB(sb)->s_lock);
+               goto out;
        ctx->pos = cpos;
        goto get_new;
 
@@ -294,9 +296,8 @@ end_of_dir:
        if (!cpos && fake_offset)
                cpos = ITER_POS_FILLED_DOTS;
        ctx->pos = cpos;
-unlock:
        mutex_unlock(&EXFAT_SB(sb)->s_lock);
-out_unlocked:
+out:
        /*
         * To improve performance, free namebuf after unlock sb_lock.
         * If namebuf is not allocated, this function do nothing
@@ -305,10 +306,11 @@ out_unlocked:
        return err;
 }
 
+WRAP_DIR_ITER(exfat_iterate) // FIXME!
 const struct file_operations exfat_dir_operations = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
-       .iterate        = exfat_iterate,
+       .iterate_shared = shared_exfat_iterate,
        .unlocked_ioctl = exfat_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl = exfat_compat_ioctl,
@@ -1079,7 +1081,8 @@ rewind:
                        if (entry_type == TYPE_EXTEND) {
                                unsigned short entry_uniname[16], unichar;
 
-                               if (step != DIRENT_STEP_NAME) {
+                               if (step != DIRENT_STEP_NAME ||
+                                   name_len >= MAX_NAME_LENGTH) {
                                        step = DIRENT_STEP_FILE;
                                        continue;
                                }
index 729ada9..f55498e 100644 (file)
@@ -273,8 +273,6 @@ struct exfat_sb_info {
 
        spinlock_t inode_hash_lock;
        struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
-
-       struct rcu_head rcu;
 };
 
 #define EXFAT_CACHE_VALID      0
index 3cbd270..32395ef 100644 (file)
@@ -22,7 +22,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
        if (err)
                return err;
 
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 
        if (!IS_SYNC(inode))
@@ -232,7 +232,7 @@ int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
        struct inode *inode = d_backing_inode(path->dentry);
        struct exfat_inode_info *ei = EXFAT_I(inode);
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        exfat_truncate_atime(&stat->atime);
        stat->result_mask |= STATX_BTIME;
        stat->btime.tv_sec = ei->i_crtime.tv_sec;
@@ -290,7 +290,7 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
        }
 
        if (attr->ia_valid & ATTR_SIZE)
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
 
        setattr_copy(&nop_mnt_idmap, inode, attr);
        exfat_truncate_atime(&inode->i_atime);
index 481dd33..13329ba 100644 (file)
@@ -355,7 +355,7 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to)
 
        if (to > i_size_read(inode)) {
                truncate_pagecache(inode, i_size_read(inode));
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                exfat_truncate(inode);
        }
 }
@@ -398,7 +398,7 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
                exfat_write_failed(mapping, pos+len);
 
        if (!(err < 0) && !(ei->attr & ATTR_ARCHIVE)) {
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                ei->attr |= ATTR_ARCHIVE;
                mark_inode_dirty(inode);
        }
@@ -577,7 +577,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
 
        inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
        inode->i_mtime = info->mtime;
-       inode->i_ctime = info->mtime;
+       inode_set_ctime_to_ts(inode, info->mtime);
        ei->i_crtime = info->crtime;
        inode->i_atime = info->atime;
 
index e0ff9d1..1b9f587 100644 (file)
@@ -569,7 +569,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
                goto unlock;
 
        inode_inc_iversion(dir);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        if (IS_DIRSYNC(dir))
                exfat_sync_inode(dir);
        else
@@ -582,8 +582,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
                goto unlock;
 
        inode_inc_iversion(inode);
-       inode->i_mtime = inode->i_atime = inode->i_ctime =
-               EXFAT_I(inode)->i_crtime = current_time(inode);
+       inode->i_mtime = inode->i_atime = EXFAT_I(inode)->i_crtime = inode_set_ctime_current(inode);
        exfat_truncate_atime(&inode->i_atime);
        /* timestamp is already written, so mark_inode_dirty() is unneeded. */
 
@@ -817,7 +816,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
        ei->dir.dir = DIR_DELETED;
 
        inode_inc_iversion(dir);
-       dir->i_mtime = dir->i_atime = current_time(dir);
+       dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
        exfat_truncate_atime(&dir->i_atime);
        if (IS_DIRSYNC(dir))
                exfat_sync_inode(dir);
@@ -825,7 +824,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
                mark_inode_dirty(dir);
 
        clear_nlink(inode);
-       inode->i_mtime = inode->i_atime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        exfat_truncate_atime(&inode->i_atime);
        exfat_unhash_inode(inode);
        exfat_d_version_set(dentry, inode_query_iversion(dir));
@@ -852,7 +851,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
                goto unlock;
 
        inode_inc_iversion(dir);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        if (IS_DIRSYNC(dir))
                exfat_sync_inode(dir);
        else
@@ -866,8 +865,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
                goto unlock;
 
        inode_inc_iversion(inode);
-       inode->i_mtime = inode->i_atime = inode->i_ctime =
-               EXFAT_I(inode)->i_crtime = current_time(inode);
+       inode->i_mtime = inode->i_atime = EXFAT_I(inode)->i_crtime = inode_set_ctime_current(inode);
        exfat_truncate_atime(&inode->i_atime);
        /* timestamp is already written, so mark_inode_dirty() is unneeded. */
 
@@ -979,7 +977,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
        ei->dir.dir = DIR_DELETED;
 
        inode_inc_iversion(dir);
-       dir->i_mtime = dir->i_atime = current_time(dir);
+       dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
        exfat_truncate_atime(&dir->i_atime);
        if (IS_DIRSYNC(dir))
                exfat_sync_inode(dir);
@@ -988,7 +986,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
        drop_nlink(dir);
 
        clear_nlink(inode);
-       inode->i_mtime = inode->i_atime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        exfat_truncate_atime(&inode->i_atime);
        exfat_unhash_inode(inode);
        exfat_d_version_set(dentry, inode_query_iversion(dir));
@@ -1312,8 +1310,8 @@ static int exfat_rename(struct mnt_idmap *idmap,
                goto unlock;
 
        inode_inc_iversion(new_dir);
-       new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime =
-               EXFAT_I(new_dir)->i_crtime = current_time(new_dir);
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+       EXFAT_I(new_dir)->i_crtime = current_time(new_dir);
        exfat_truncate_atime(&new_dir->i_atime);
        if (IS_DIRSYNC(new_dir))
                exfat_sync_inode(new_dir);
@@ -1336,7 +1334,6 @@ static int exfat_rename(struct mnt_idmap *idmap,
        }
 
        inode_inc_iversion(old_dir);
-       old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
        if (IS_DIRSYNC(old_dir))
                exfat_sync_inode(old_dir);
        else
@@ -1354,8 +1351,7 @@ static int exfat_rename(struct mnt_idmap *idmap,
                        exfat_warn(sb, "abnormal access to an inode dropped");
                        WARN_ON(new_inode->i_nlink == 0);
                }
-               new_inode->i_ctime = EXFAT_I(new_inode)->i_crtime =
-                       current_time(new_inode);
+               EXFAT_I(new_inode)->i_crtime = current_time(new_inode);
        }
 
 unlock:
index 8c32460..2778bd9 100644 (file)
@@ -31,16 +31,6 @@ static void exfat_free_iocharset(struct exfat_sb_info *sbi)
                kfree(sbi->options.iocharset);
 }
 
-static void exfat_delayed_free(struct rcu_head *p)
-{
-       struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu);
-
-       unload_nls(sbi->nls_io);
-       exfat_free_iocharset(sbi);
-       exfat_free_upcase_table(sbi);
-       kfree(sbi);
-}
-
 static void exfat_put_super(struct super_block *sb)
 {
        struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -50,7 +40,8 @@ static void exfat_put_super(struct super_block *sb)
        brelse(sbi->boot_bh);
        mutex_unlock(&sbi->s_lock);
 
-       call_rcu(&sbi->rcu, exfat_delayed_free);
+       unload_nls(sbi->nls_io);
+       exfat_free_upcase_table(sbi);
 }
 
 static int exfat_sync_fs(struct super_block *sb, int wait)
@@ -379,8 +370,7 @@ static int exfat_read_root(struct inode *inode)
        ei->i_size_ondisk = i_size_read(inode);
 
        exfat_save_attr(inode, ATTR_SUBDIR);
-       inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
-               current_time(inode);
+       inode->i_mtime = inode->i_atime = ei->i_crtime = inode_set_ctime_current(inode);
        exfat_truncate_atime(&inode->i_atime);
        return 0;
 }
@@ -710,9 +700,6 @@ free_table:
 
 check_nls_io:
        unload_nls(sbi->nls_io);
-       exfat_free_iocharset(sbi);
-       sb->s_fs_info = NULL;
-       kfree(sbi);
        return err;
 }
 
@@ -721,14 +708,18 @@ static int exfat_get_tree(struct fs_context *fc)
        return get_tree_bdev(fc, exfat_fill_super);
 }
 
+static void exfat_free_sbi(struct exfat_sb_info *sbi)
+{
+       exfat_free_iocharset(sbi);
+       kfree(sbi);
+}
+
 static void exfat_free(struct fs_context *fc)
 {
        struct exfat_sb_info *sbi = fc->s_fs_info;
 
-       if (sbi) {
-               exfat_free_iocharset(sbi);
-               kfree(sbi);
-       }
+       if (sbi)
+               exfat_free_sbi(sbi);
 }
 
 static int exfat_reconfigure(struct fs_context *fc)
@@ -773,12 +764,21 @@ static int exfat_init_fs_context(struct fs_context *fc)
        return 0;
 }
 
+static void exfat_kill_sb(struct super_block *sb)
+{
+       struct exfat_sb_info *sbi = sb->s_fs_info;
+
+       kill_block_super(sb);
+       if (sbi)
+               exfat_free_sbi(sbi);
+}
+
 static struct file_system_type exfat_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "exfat",
        .init_fs_context        = exfat_init_fs_context,
        .parameters             = exfat_parameters,
-       .kill_sb                = kill_block_super,
+       .kill_sb                = exfat_kill_sb,
        .fs_flags               = FS_REQUIRES_DEV,
 };
 
index 40e624c..d1dbe47 100644 (file)
@@ -315,7 +315,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
                goto out;
 
        error = -EINVAL;
-       if (!file->f_op->iterate && !file->f_op->iterate_shared)
+       if (!file->f_op->iterate_shared)
                goto out_close;
 
        buffer.sequence = 0;
index 82b17d7..7e54c31 100644 (file)
@@ -237,7 +237,7 @@ ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
        error = __ext2_set_acl(inode, acl, type);
        if (!error && update_mode) {
                inode->i_mode = mode;
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                mark_inode_dirty(inode);
        }
        return error;
index 42db804..b335f17 100644 (file)
@@ -468,7 +468,7 @@ int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
        ext2_set_de_type(de, inode);
        ext2_commit_chunk(page, pos, len);
        if (update_times)
-               dir->i_mtime = dir->i_ctime = current_time(dir);
+               dir->i_mtime = inode_set_ctime_current(dir);
        EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
        mark_inode_dirty(dir);
        return ext2_handle_dirsync(dir);
@@ -555,7 +555,7 @@ got_it:
        de->inode = cpu_to_le32(inode->i_ino);
        ext2_set_de_type (de, inode);
        ext2_commit_chunk(page, pos, rec_len);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
        mark_inode_dirty(dir);
        err = ext2_handle_dirsync(dir);
@@ -606,7 +606,7 @@ int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page)
                pde->rec_len = ext2_rec_len_to_disk(to - from);
        dir->inode = 0;
        ext2_commit_chunk(page, pos, to - from);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
        mark_inode_dirty(inode);
        return ext2_handle_dirsync(inode);
index a4e1d7a..124df89 100644 (file)
@@ -549,7 +549,7 @@ got:
 
        inode->i_ino = ino;
        inode->i_blocks = 0;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        memset(ei->i_data, 0, sizeof(ei->i_data));
        ei->i_flags =
                ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
index 7598321..acbab27 100644 (file)
@@ -595,7 +595,7 @@ static void ext2_splice_branch(struct inode *inode,
        if (where->bh)
                mark_buffer_dirty_inode(where->bh, inode);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 }
 
@@ -1287,7 +1287,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
        __ext2_truncate_blocks(inode, newsize);
        filemap_invalidate_unlock(inode->i_mapping);
 
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        if (inode_needs_sync(inode)) {
                sync_mapping_buffers(inode->i_mapping);
                sync_inode_metadata(inode, 1);
@@ -1409,9 +1409,9 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
        set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
        inode->i_size = le32_to_cpu(raw_inode->i_size);
        inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
-       inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
+       inode_set_ctime(inode, (signed)le32_to_cpu(raw_inode->i_ctime), 0);
        inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
-       inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+       inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
        ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
        /* We now have enough fields to check if the inode was active or not.
         * This is needed because nfsd might try to access dead inodes
@@ -1541,7 +1541,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
        raw_inode->i_size = cpu_to_le32(inode->i_size);
        raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
-       raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+       raw_inode->i_ctime = cpu_to_le32(inode_get_ctime(inode).tv_sec);
        raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
 
        raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
@@ -1628,7 +1628,7 @@ int ext2_getattr(struct mnt_idmap *idmap, const struct path *path,
                        STATX_ATTR_IMMUTABLE |
                        STATX_ATTR_NODUMP);
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        return 0;
 }
 
index cc87d41..44e0448 100644 (file)
@@ -44,7 +44,7 @@ int ext2_fileattr_set(struct mnt_idmap *idmap,
                (fa->flags & EXT2_FL_USER_MODIFIABLE);
 
        ext2_set_inode_flags(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 
        return 0;
@@ -77,7 +77,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                }
 
                inode_lock(inode);
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                inode->i_generation = generation;
                inode_unlock(inode);
 
index 937dd8f..0595170 100644 (file)
@@ -211,7 +211,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
        if (err)
                return err;
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        inode_inc_link_count(inode);
        ihold(inode);
 
@@ -291,7 +291,7 @@ static int ext2_unlink(struct inode *dir, struct dentry *dentry)
        if (err)
                goto out;
 
-       inode->i_ctime = dir->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
        inode_dec_link_count(inode);
        err = 0;
 out:
@@ -367,7 +367,7 @@ static int ext2_rename (struct mnt_idmap * idmap,
                ext2_put_page(new_page, new_de);
                if (err)
                        goto out_dir;
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
                if (dir_de)
                        drop_nlink(new_inode);
                inode_dec_link_count(new_inode);
@@ -383,7 +383,7 @@ static int ext2_rename (struct mnt_idmap * idmap,
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
-       old_inode->i_ctime = current_time(old_inode);
+       inode_set_ctime_current(old_inode);
        mark_inode_dirty(old_inode);
 
        err = ext2_delete_entry(old_de, old_page);
index 2959afc..aaf3e3e 100644 (file)
@@ -1572,7 +1572,7 @@ out:
        if (inode->i_size < off+len-towrite)
                i_size_write(inode, off+len-towrite);
        inode_inc_iversion(inode);
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        return len - towrite;
 }
index 8906ba4..1c91871 100644 (file)
@@ -773,7 +773,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 
        /* Update the inode. */
        EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        if (IS_SYNC(inode)) {
                error = sync_inode_metadata(inode, 1);
                /* In case sync failed due to ENOSPC the inode was actually
index 27fcbdd..3bffe86 100644 (file)
@@ -259,7 +259,7 @@ retry:
        error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
        if (!error && update_mode) {
                inode->i_mode = mode;
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                error = ext4_mark_inode_dirty(handle, inode);
        }
 out_stop:
index 0a2d55f..1e2259d 100644 (file)
@@ -868,64 +868,70 @@ struct ext4_inode {
  * affected filesystem before 2242.
  */
 
-static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
+static inline __le32 ext4_encode_extra_time(struct timespec64 ts)
 {
-       u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK;
-       return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
+       u32 extra = ((ts.tv_sec - (s32)ts.tv_sec) >> 32) & EXT4_EPOCH_MASK;
+       return cpu_to_le32(extra | (ts.tv_nsec << EXT4_EPOCH_BITS));
 }
 
-static inline void ext4_decode_extra_time(struct timespec64 *time,
-                                         __le32 extra)
+static inline struct timespec64 ext4_decode_extra_time(__le32 base,
+                                                      __le32 extra)
 {
+       struct timespec64 ts = { .tv_sec = (signed)le32_to_cpu(base) };
+
        if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK)))
-               time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
-       time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+               ts.tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
+       ts.tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+       return ts;
 }
 
-#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)                          \
+#define EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, ts)                  \
 do {                                                                           \
-       if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra))     {\
-               (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec);        \
-               (raw_inode)->xtime ## _extra =                                  \
-                               ext4_encode_extra_time(&(inode)->xtime);        \
-               }                                                               \
-       else    \
-               (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (inode)->xtime.tv_sec, S32_MIN, S32_MAX));    \
+       if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {    \
+               (raw_inode)->xtime = cpu_to_le32((ts).tv_sec);                  \
+               (raw_inode)->xtime ## _extra = ext4_encode_extra_time(ts);      \
+       } else                                                                  \
+               (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (ts).tv_sec, S32_MIN, S32_MAX));      \
 } while (0)
 
-#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)                               \
-do {                                                                          \
-       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
-               (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec);      \
-       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
-               (raw_inode)->xtime ## _extra =                                 \
-                               ext4_encode_extra_time(&(einode)->xtime);      \
-} while (0)
+#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode)                          \
+       EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, (inode)->xtime)
+
+#define EXT4_INODE_SET_CTIME(inode, raw_inode)                                 \
+       EXT4_INODE_SET_XTIME_VAL(i_ctime, inode, raw_inode, inode_get_ctime(inode))
+
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode)                                \
+       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                       \
+               EXT4_INODE_SET_XTIME_VAL(xtime, &((einode)->vfs_inode),         \
+                                        raw_inode, (einode)->xtime)
+
+#define EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode)                      \
+       (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra) ?        \
+               ext4_decode_extra_time((raw_inode)->xtime,                              \
+                                      (raw_inode)->xtime ## _extra) :          \
+               (struct timespec64) {                                           \
+                       .tv_sec = (signed)le32_to_cpu((raw_inode)->xtime)       \
+               })
 
 #define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode)                          \
 do {                                                                           \
-       (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime);        \
-       if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {    \
-               ext4_decode_extra_time(&(inode)->xtime,                         \
-                                      raw_inode->xtime ## _extra);             \
-               }                                                               \
-       else                                                                    \
-               (inode)->xtime.tv_nsec = 0;                                     \
+       (inode)->xtime = EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode);     \
 } while (0)
 
+#define EXT4_INODE_GET_CTIME(inode, raw_inode)                                 \
+do {                                                                           \
+       inode_set_ctime_to_ts(inode,                                            \
+               EXT4_INODE_GET_XTIME_VAL(i_ctime, inode, raw_inode));           \
+} while (0)
 
-#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)                               \
-do {                                                                          \
-       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                      \
-               (einode)->xtime.tv_sec =                                       \
-                       (signed)le32_to_cpu((raw_inode)->xtime);               \
-       else                                                                   \
-               (einode)->xtime.tv_sec = 0;                                    \
-       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra))            \
-               ext4_decode_extra_time(&(einode)->xtime,                       \
-                                      raw_inode->xtime ## _extra);            \
-       else                                                                   \
-               (einode)->xtime.tv_nsec = 0;                                   \
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode)                                \
+do {                                                                           \
+       if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime))                       \
+               (einode)->xtime =                                               \
+                       EXT4_INODE_GET_XTIME_VAL(xtime, &(einode->vfs_inode),   \
+                                                raw_inode);                    \
+       else                                                                    \
+               (einode)->xtime = (struct timespec64){0, 0};                    \
 } while (0)
 
 #define i_disk_version osd1.linux1.l_i_version
index 77f318e..b38d595 100644 (file)
@@ -234,8 +234,7 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
 
        might_sleep();
 
-       if (bh->b_bdev->bd_super)
-               ext4_check_bdev_write_error(bh->b_bdev->bd_super);
+       ext4_check_bdev_write_error(sb);
 
        if (ext4_handle_valid(handle)) {
                err = jbd2_journal_get_write_access(handle, bh);
index e4115d3..202c769 100644 (file)
@@ -4476,12 +4476,12 @@ retry:
                map.m_lblk += ret;
                map.m_len = len = len - ret;
                epos = (loff_t)map.m_lblk << inode->i_blkbits;
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                if (new_size) {
                        if (epos > new_size)
                                epos = new_size;
                        if (ext4_update_inode_size(inode, epos) & 0x1)
-                               inode->i_mtime = inode->i_ctime;
+                               inode->i_mtime = inode_get_ctime(inode);
                }
                ret2 = ext4_mark_inode_dirty(handle, inode);
                ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -4617,7 +4617,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 
                /* Now release the pages and zero block aligned part of pages */
                truncate_pagecache_range(inode, start, end - 1);
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
 
                ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
                                             flags);
@@ -4642,7 +4642,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
                goto out_mutex;
        }
 
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        if (new_size)
                ext4_update_inode_size(inode, new_size);
        ret = ext4_mark_inode_dirty(handle, inode);
@@ -5378,7 +5378,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
        up_write(&EXT4_I(inode)->i_data_sem);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        ret = ext4_mark_inode_dirty(handle, inode);
        ext4_update_inode_fsync_trans(handle, inode, 1);
 
@@ -5488,7 +5488,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
        /* Expand file to avoid data loss if there is error while shifting */
        inode->i_size += len;
        EXT4_I(inode)->i_disksize += len;
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        ret = ext4_mark_inode_dirty(handle, inode);
        if (ret)
                goto out_stop;
index 754f961..48abef5 100644 (file)
@@ -1250,7 +1250,7 @@ got:
        inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
        /* This is the optimal IO size (for stat), not the fs block size */
        inode->i_blocks = 0;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        ei->i_crtime = inode->i_mtime;
 
        memset(ei->i_data, 0, sizeof(ei->i_data));
index a4b7e4b..0038610 100644 (file)
@@ -1037,7 +1037,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
         * happen is that the times are slightly out of date
         * and/or different from the directory change time.
         */
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        ext4_update_dx_flag(dir);
        inode_inc_iversion(dir);
        return 1;
@@ -1991,7 +1991,7 @@ out:
                ext4_orphan_del(handle, inode);
 
        if (err == 0) {
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                err = ext4_mark_inode_dirty(handle, inode);
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
index 7935ea6..f0c0fd5 100644 (file)
@@ -245,9 +245,9 @@ static void inode_test_xtimestamp_decoding(struct kunit *test)
        struct timestamp_expectation *test_param =
                        (struct timestamp_expectation *)(test->param_value);
 
-       timestamp.tv_sec = get_32bit_time(test_param);
-       ext4_decode_extra_time(&timestamp,
-                              cpu_to_le32(test_param->extra_bits));
+       timestamp = ext4_decode_extra_time(
+                               cpu_to_le32(get_32bit_time(test_param)),
+                               cpu_to_le32(test_param->extra_bits));
 
        KUNIT_EXPECT_EQ_MSG(test,
                            test_param->expected.tv_sec,
index 43775a6..6683076 100644 (file)
@@ -3986,7 +3986,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
 
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        ret2 = ext4_mark_inode_dirty(handle, inode);
        if (unlikely(ret2))
                ret = ret2;
@@ -4146,7 +4146,7 @@ out_stop:
        if (inode->i_nlink)
                ext4_orphan_del(handle, inode);
 
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        err2 = ext4_mark_inode_dirty(handle, inode);
        if (unlikely(err2 && !err))
                err = err2;
@@ -4249,7 +4249,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode
        }
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
 
-       EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+       EXT4_INODE_SET_CTIME(inode, raw_inode);
        EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
        EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
        EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
@@ -4858,7 +4858,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
                }
        }
 
-       EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
+       EXT4_INODE_GET_CTIME(inode, raw_inode);
        EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
        EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
        EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
@@ -4981,7 +4981,7 @@ static void __ext4_update_other_inode_time(struct super_block *sb,
                spin_unlock(&inode->i_lock);
 
                spin_lock(&ei->i_raw_lock);
-               EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+               EXT4_INODE_SET_CTIME(inode, raw_inode);
                EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
                EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
                ext4_inode_csum_set(inode, raw_inode, ei);
@@ -5376,10 +5376,8 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                         * Update c/mtime on truncate up, ext4_truncate() will
                         * update c/mtime in shrink case below
                         */
-                       if (!shrink) {
-                               inode->i_mtime = current_time(inode);
-                               inode->i_ctime = inode->i_mtime;
-                       }
+                       if (!shrink)
+                               inode->i_mtime = inode_set_ctime_current(inode);
 
                        if (shrink)
                                ext4_fc_track_range(handle, inode,
@@ -5537,7 +5535,7 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
                                  STATX_ATTR_NODUMP |
                                  STATX_ATTR_VERITY);
 
-       generic_fillattr(idmap, inode, stat);
+       generic_fillattr(idmap, request_mask, inode, stat);
        return 0;
 }
 
index 3318595..b0349f4 100644 (file)
@@ -449,7 +449,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
        diff = size - size_bl;
        swap_inode_data(inode, inode_bl);
 
-       inode->i_ctime = inode_bl->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
+       inode_set_ctime_current(inode_bl);
        inode_inc_iversion(inode);
 
        inode->i_generation = get_random_u32();
@@ -663,7 +664,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
 
        ext4_set_inode_flags(inode, false);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        inode_inc_iversion(inode);
 
        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -774,7 +775,7 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
        }
 
        EXT4_I(inode)->i_projid = kprojid;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        inode_inc_iversion(inode);
 out_dirty:
        rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -1266,7 +1267,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                }
                err = ext4_reserve_inode_write(handle, inode, &iloc);
                if (err == 0) {
-                       inode->i_ctime = current_time(inode);
+                       inode_set_ctime_current(inode);
                        inode_inc_iversion(inode);
                        inode->i_generation = generation;
                        err = ext4_mark_iloc_dirty(handle, inode, &iloc);
index a2475b8..21b903f 100644 (file)
@@ -1006,14 +1006,11 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
         * fls() instead since we need to know the actual length while modifying
         * goal length.
         */
-       order = fls(ac->ac_g_ex.fe_len);
+       order = fls(ac->ac_g_ex.fe_len) - 1;
        min_order = order - sbi->s_mb_best_avail_max_trim_order;
        if (min_order < 0)
                min_order = 0;
 
-       if (1 << min_order < ac->ac_o_ex.fe_len)
-               min_order = fls(ac->ac_o_ex.fe_len) + 1;
-
        if (sbi->s_stripe > 0) {
                /*
                 * We are assuming that stripe size is always a multiple of
@@ -1021,9 +1018,16 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
                 */
                num_stripe_clusters = EXT4_NUM_B2C(sbi, sbi->s_stripe);
                if (1 << min_order < num_stripe_clusters)
-                       min_order = fls(num_stripe_clusters);
+                       /*
+                        * We consider 1 order less because later we round
+                        * up the goal len to num_stripe_clusters
+                        */
+                       min_order = fls(num_stripe_clusters) - 1;
        }
 
+       if (1 << min_order < ac->ac_o_ex.fe_len)
+               min_order = fls(ac->ac_o_ex.fe_len);
+
        for (i = order; i >= min_order; i--) {
                int frag_order;
                /*
@@ -4761,8 +4765,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        int order, i;
        struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
        struct ext4_locality_group *lg;
-       struct ext4_prealloc_space *tmp_pa, *cpa = NULL;
-       ext4_lblk_t tmp_pa_start, tmp_pa_end;
+       struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL;
+       loff_t tmp_pa_end;
        struct rb_node *iter;
        ext4_fsblk_t goal_block;
 
@@ -4770,47 +4774,151 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
        if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
                return false;
 
-       /* first, try per-file preallocation */
+       /*
+        * first, try per-file preallocation by searching the inode pa rbtree.
+        *
+        * Here, we can't do a direct traversal of the tree because
+        * ext4_mb_discard_group_preallocation() can paralelly mark the pa
+        * deleted and that can cause direct traversal to skip some entries.
+        */
        read_lock(&ei->i_prealloc_lock);
+
+       if (RB_EMPTY_ROOT(&ei->i_prealloc_node)) {
+               goto try_group_pa;
+       }
+
+       /*
+        * Step 1: Find a pa with logical start immediately adjacent to the
+        * original logical start. This could be on the left or right.
+        *
+        * (tmp_pa->pa_lstart never changes so we can skip locking for it).
+        */
        for (iter = ei->i_prealloc_node.rb_node; iter;
             iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical,
-                                           tmp_pa_start, iter)) {
+                                           tmp_pa->pa_lstart, iter)) {
                tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
                                  pa_node.inode_node);
+       }
 
-               /* all fields in this condition don't change,
-                * so we can skip locking for them */
-               tmp_pa_start = tmp_pa->pa_lstart;
-               tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
-
-               /* original request start doesn't lie in this PA */
-               if (ac->ac_o_ex.fe_logical < tmp_pa_start ||
-                   ac->ac_o_ex.fe_logical >= tmp_pa_end)
-                       continue;
+       /*
+        * Step 2: The adjacent pa might be to the right of logical start, find
+        * the left adjacent pa. After this step we'd have a valid tmp_pa whose
+        * logical start is towards the left of original request's logical start
+        */
+       if (tmp_pa->pa_lstart > ac->ac_o_ex.fe_logical) {
+               struct rb_node *tmp;
+               tmp = rb_prev(&tmp_pa->pa_node.inode_node);
 
-               /* non-extent files can't have physical blocks past 2^32 */
-               if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
-                   (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
-                    EXT4_MAX_BLOCK_FILE_PHYS)) {
+               if (tmp) {
+                       tmp_pa = rb_entry(tmp, struct ext4_prealloc_space,
+                                           pa_node.inode_node);
+               } else {
                        /*
-                        * Since PAs don't overlap, we won't find any
-                        * other PA to satisfy this.
+                        * If there is no adjacent pa to the left then finding
+                        * an overlapping pa is not possible hence stop searching
+                        * inode pa tree
                         */
-                       break;
+                       goto try_group_pa;
                }
+       }
+
+       BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));
 
-               /* found preallocated blocks, use them */
+       /*
+        * Step 3: If the left adjacent pa is deleted, keep moving left to find
+        * the first non deleted adjacent pa. After this step we should have a
+        * valid tmp_pa which is guaranteed to be non deleted.
+        */
+       for (iter = &tmp_pa->pa_node.inode_node;; iter = rb_prev(iter)) {
+               if (!iter) {
+                       /*
+                        * no non deleted left adjacent pa, so stop searching
+                        * inode pa tree
+                        */
+                       goto try_group_pa;
+               }
+               tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
+                                 pa_node.inode_node);
                spin_lock(&tmp_pa->pa_lock);
-               if (tmp_pa->pa_deleted == 0 && tmp_pa->pa_free &&
-                   likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
-                       atomic_inc(&tmp_pa->pa_count);
-                       ext4_mb_use_inode_pa(ac, tmp_pa);
+               if (tmp_pa->pa_deleted == 0) {
+                       /*
+                        * We will keep holding the pa_lock from
+                        * this point on because we don't want group discard
+                        * to delete this pa underneath us. Since group
+                        * discard is anyways an ENOSPC operation it
+                        * should be okay for it to wait a few more cycles.
+                        */
+                       break;
+               } else {
                        spin_unlock(&tmp_pa->pa_lock);
-                       read_unlock(&ei->i_prealloc_lock);
-                       return true;
                }
+       }
+
+       BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));
+       BUG_ON(tmp_pa->pa_deleted == 1);
+
+       /*
+        * Step 4: We now have the non deleted left adjacent pa. Only this
+        * pa can possibly satisfy the request hence check if it overlaps
+        * original logical start and stop searching if it doesn't.
+        */
+       tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
+
+       if (ac->ac_o_ex.fe_logical >= tmp_pa_end) {
                spin_unlock(&tmp_pa->pa_lock);
+               goto try_group_pa;
+       }
+
+       /* non-extent files can't have physical blocks past 2^32 */
+       if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
+           (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
+            EXT4_MAX_BLOCK_FILE_PHYS)) {
+               /*
+                * Since PAs don't overlap, we won't find any other PA to
+                * satisfy this.
+                */
+               spin_unlock(&tmp_pa->pa_lock);
+               goto try_group_pa;
+       }
+
+       if (tmp_pa->pa_free && likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
+               atomic_inc(&tmp_pa->pa_count);
+               ext4_mb_use_inode_pa(ac, tmp_pa);
+               spin_unlock(&tmp_pa->pa_lock);
+               read_unlock(&ei->i_prealloc_lock);
+               return true;
+       } else {
+               /*
+                * We found a valid overlapping pa but couldn't use it because
+                * it had no free blocks. This should ideally never happen
+                * because:
+                *
+                * 1. When a new inode pa is added to rbtree it must have
+                *    pa_free > 0 since otherwise we won't actually need
+                *    preallocation.
+                *
+                * 2. An inode pa that is in the rbtree can only have it's
+                *    pa_free become zero when another thread calls:
+                *      ext4_mb_new_blocks
+                *       ext4_mb_use_preallocated
+                *        ext4_mb_use_inode_pa
+                *
+                * 3. Further, after the above calls make pa_free == 0, we will
+                *    immediately remove it from the rbtree in:
+                *      ext4_mb_new_blocks
+                *       ext4_mb_release_context
+                *        ext4_mb_put_pa
+                *
+                * 4. Since the pa_free becoming 0 and pa_free getting removed
+                * from tree both happen in ext4_mb_new_blocks, which is always
+                * called with i_data_sem held for data allocations, we can be
+                * sure that another process will never see a pa in rbtree with
+                * pa_free == 0.
+                */
+               WARN_ON_ONCE(tmp_pa->pa_free == 0);
        }
+       spin_unlock(&tmp_pa->pa_lock);
+try_group_pa:
        read_unlock(&ei->i_prealloc_lock);
 
        /* can we use group allocation? */
index 0caf6c7..933ad03 100644 (file)
@@ -2203,7 +2203,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
         * happen is that the times are slightly out of date
         * and/or different from the directory change time.
         */
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        ext4_update_dx_flag(dir);
        inode_inc_iversion(dir);
        err2 = ext4_mark_inode_dirty(handle, dir);
@@ -3197,7 +3197,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
         * recovery. */
        inode->i_size = 0;
        ext4_orphan_add(handle, inode);
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+       dir->i_mtime = inode_set_ctime_current(dir);
+       inode_set_ctime_current(inode);
        retval = ext4_mark_inode_dirty(handle, inode);
        if (retval)
                goto end_rmdir;
@@ -3271,7 +3272,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
                retval = ext4_delete_entry(handle, dir, de, bh);
                if (retval)
                        goto out_handle;
-               dir->i_ctime = dir->i_mtime = current_time(dir);
+               dir->i_mtime = inode_set_ctime_current(dir);
                ext4_update_dx_flag(dir);
                retval = ext4_mark_inode_dirty(handle, dir);
                if (retval)
@@ -3286,7 +3287,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
                drop_nlink(inode);
        if (!inode->i_nlink)
                ext4_orphan_add(handle, inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        retval = ext4_mark_inode_dirty(handle, inode);
        if (dentry && !retval)
                ext4_fc_track_unlink(handle, dentry);
@@ -3463,7 +3464,7 @@ retry:
        if (IS_DIRSYNC(dir))
                ext4_handle_sync(handle);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        ext4_inc_count(inode);
        ihold(inode);
 
@@ -3641,8 +3642,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
        if (ext4_has_feature_filetype(ent->dir->i_sb))
                ent->de->file_type = file_type;
        inode_inc_iversion(ent->dir);
-       ent->dir->i_ctime = ent->dir->i_mtime =
-               current_time(ent->dir);
+       ent->dir->i_mtime = inode_set_ctime_current(ent->dir);
        retval = ext4_mark_inode_dirty(handle, ent->dir);
        BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
        if (!ent->inlined) {
@@ -3941,7 +3941,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
-       old.inode->i_ctime = current_time(old.inode);
+       inode_set_ctime_current(old.inode);
        retval = ext4_mark_inode_dirty(handle, old.inode);
        if (unlikely(retval))
                goto end_rename;
@@ -3955,9 +3955,9 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 
        if (new.inode) {
                ext4_dec_count(new.inode);
-               new.inode->i_ctime = current_time(new.inode);
+               inode_set_ctime_current(new.inode);
        }
-       old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
+       old.dir->i_mtime = inode_set_ctime_current(old.dir);
        ext4_update_dx_flag(old.dir);
        if (old.dir_bh) {
                retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
@@ -4053,7 +4053,6 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        };
        u8 new_file_type;
        int retval;
-       struct timespec64 ctime;
 
        if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
             !projid_eq(EXT4_I(new_dir)->i_projid,
@@ -4147,9 +4146,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
-       ctime = current_time(old.inode);
-       old.inode->i_ctime = ctime;
-       new.inode->i_ctime = ctime;
+       inode_set_ctime_current(old.inode);
+       inode_set_ctime_current(new.inode);
        retval = ext4_mark_inode_dirty(handle, old.inode);
        if (unlikely(retval))
                goto end_rename;
index c94ebf7..73547d2 100644 (file)
@@ -93,6 +93,7 @@ static int ext4_get_tree(struct fs_context *fc);
 static int ext4_reconfigure(struct fs_context *fc);
 static void ext4_fc_free(struct fs_context *fc);
 static int ext4_init_fs_context(struct fs_context *fc);
+static void ext4_kill_sb(struct super_block *sb);
 static const struct fs_parameter_spec ext4_param_specs[];
 
 /*
@@ -135,12 +136,12 @@ static struct file_system_type ext2_fs_type = {
        .name                   = "ext2",
        .init_fs_context        = ext4_init_fs_context,
        .parameters             = ext4_param_specs,
-       .kill_sb                = kill_block_super,
+       .kill_sb                = ext4_kill_sb,
        .fs_flags               = FS_REQUIRES_DEV,
 };
 MODULE_ALIAS_FS("ext2");
 MODULE_ALIAS("ext2");
-#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
+#define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
 #else
 #define IS_EXT2_SB(sb) (0)
 #endif
@@ -151,12 +152,12 @@ static struct file_system_type ext3_fs_type = {
        .name                   = "ext3",
        .init_fs_context        = ext4_init_fs_context,
        .parameters             = ext4_param_specs,
-       .kill_sb                = kill_block_super,
+       .kill_sb                = ext4_kill_sb,
        .fs_flags               = FS_REQUIRES_DEV,
 };
 MODULE_ALIAS_FS("ext3");
 MODULE_ALIAS("ext3");
-#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
+#define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
 
 
 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
@@ -1096,15 +1097,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
         */
 }
 
-static void ext4_bdev_mark_dead(struct block_device *bdev)
-{
-       ext4_force_shutdown(bdev->bd_holder, EXT4_GOING_FLAGS_NOLOGFLUSH);
-}
-
-static const struct blk_holder_ops ext4_holder_ops = {
-       .mark_dead              = ext4_bdev_mark_dead,
-};
-
 /*
  * Open the external journal device
  */
@@ -1113,7 +1105,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
        struct block_device *bdev;
 
        bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
-                                &ext4_holder_ops);
+                                &fs_holder_ops);
        if (IS_ERR(bdev))
                goto fail;
        return bdev;
@@ -1125,25 +1117,6 @@ fail:
        return NULL;
 }
 
-/*
- * Release the journal device
- */
-static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
-{
-       struct block_device *bdev;
-       bdev = sbi->s_journal_bdev;
-       if (bdev) {
-               /*
-                * Invalidate the journal device's buffers.  We don't want them
-                * floating about in memory - the physical journal device may
-                * hotswapped, and it breaks the `ro-after' testing code.
-                */
-               invalidate_bdev(bdev);
-               blkdev_put(bdev, sbi->s_sb);
-               sbi->s_journal_bdev = NULL;
-       }
-}
-
 static inline struct inode *orphan_list_entry(struct list_head *l)
 {
        return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@@ -1339,8 +1312,13 @@ static void ext4_put_super(struct super_block *sb)
        sync_blockdev(sb->s_bdev);
        invalidate_bdev(sb->s_bdev);
        if (sbi->s_journal_bdev) {
+               /*
+                * Invalidate the journal device's buffers.  We don't want them
+                * floating about in memory - the physical journal device may
+                * hotswapped, and it breaks the `ro-after' testing code.
+                */
                sync_blockdev(sbi->s_journal_bdev);
-               ext4_blkdev_remove(sbi);
+               invalidate_bdev(sbi->s_journal_bdev);
        }
 
        ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
@@ -5572,7 +5550,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
        spin_lock_init(&sbi->s_bdev_wb_lock);
        errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
                                 &sbi->s_bdev_wb_err);
-       sb->s_bdev->bd_super = sb;
        EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
        ext4_orphan_cleanup(sb, es);
        EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
@@ -5664,9 +5641,11 @@ failed_mount:
                kfree(get_qf_name(sb, sbi, i));
 #endif
        fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
-       /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
        brelse(sbi->s_sbh);
-       ext4_blkdev_remove(sbi);
+       if (sbi->s_journal_bdev) {
+               invalidate_bdev(sbi->s_journal_bdev);
+               blkdev_put(sbi->s_journal_bdev, sb);
+       }
 out_fail:
        invalidate_bdev(sb->s_bdev);
        sb->s_fs_info = NULL;
@@ -5854,7 +5833,10 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
        if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
                return NULL;
 
+       /* see get_tree_bdev why this is needed and safe */
+       up_write(&sb->s_umount);
        bdev = ext4_blkdev_get(j_dev, sb);
+       down_write(&sb->s_umount);
        if (bdev == NULL)
                return NULL;
 
@@ -7103,7 +7085,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
        }
        EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
        inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        err = ext4_mark_inode_dirty(handle, inode);
        ext4_journal_stop(handle);
 out_unlock:
@@ -7273,13 +7255,24 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
        return 1;
 }
 
+static void ext4_kill_sb(struct super_block *sb)
+{
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct block_device *journal_bdev = sbi ? sbi->s_journal_bdev : NULL;
+
+       kill_block_super(sb);
+
+       if (journal_bdev)
+               blkdev_put(journal_bdev, sb);
+}
+
 static struct file_system_type ext4_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "ext4",
        .init_fs_context        = ext4_init_fs_context,
        .parameters             = ext4_param_specs,
-       .kill_sb                = kill_block_super,
-       .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+       .kill_sb                = ext4_kill_sb,
+       .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
 };
 MODULE_ALIAS_FS("ext4");
 
index 321e3a8..281e1bf 100644 (file)
@@ -356,13 +356,13 @@ ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
 
 static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
 {
-       return ((u64)ea_inode->i_ctime.tv_sec << 32) |
+       return ((u64) inode_get_ctime(ea_inode).tv_sec << 32) |
                (u32) inode_peek_iversion_raw(ea_inode);
 }
 
 static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
 {
-       ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
+       inode_set_ctime(ea_inode, (u32)(ref_count >> 32), 0);
        inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff);
 }
 
@@ -1782,6 +1782,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
                memmove(here, (void *)here + size,
                        (void *)last - (void *)here + sizeof(__u32));
                memset(last, 0, size);
+
+               /*
+                * Update i_inline_off - moved ibody region might contain
+                * system.data attribute.  Handling a failure here won't
+                * cause other complications for setting an xattr.
+                */
+               if (!is_block && ext4_has_inline_data(inode)) {
+                       ret = ext4_find_inline_data_nolock(inode);
+                       if (ret) {
+                               ext4_warning_inode(inode,
+                                       "unable to update i_inline_off");
+                               goto out;
+                       }
+               }
        } else if (s->not_found) {
                /* Insert new name. */
                size_t size = EXT4_XATTR_LEN(name_len);
@@ -2459,7 +2473,7 @@ retry_inode:
        }
        if (!error) {
                ext4_xattr_update_super_block(handle, inode->i_sb);
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                inode_inc_iversion(inode);
                if (!value)
                        no_expand = 0;
index 236d890..0f7df9c 100644 (file)
@@ -1045,7 +1045,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
        struct address_space *mapping = cc->inode->i_mapping;
        struct page *page;
        sector_t last_block_in_bio;
-       unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT;
+       fgf_t fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT;
        pgoff_t start_idx = start_idx_of_cluster(cc);
        int i, ret;
 
index d635c58..8aa29fe 100644 (file)
@@ -455,7 +455,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
        de->file_type = fs_umode_to_ftype(inode->i_mode);
        set_page_dirty(page);
 
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        f2fs_mark_inode_dirty_sync(dir, false);
        f2fs_put_page(page, 1);
 }
@@ -609,7 +609,7 @@ void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
                        f2fs_i_links_write(dir, true);
                clear_inode_flag(inode, FI_NEW_INODE);
        }
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        f2fs_mark_inode_dirty_sync(dir, false);
 
        if (F2FS_I(dir)->i_current_depth != current_depth)
@@ -858,7 +858,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
 
        if (S_ISDIR(inode->i_mode))
                f2fs_i_links_write(dir, false);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
 
        f2fs_i_links_write(inode, false);
        if (S_ISDIR(inode->i_mode)) {
@@ -919,7 +919,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
        }
        f2fs_put_page(page, 1);
 
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        f2fs_mark_inode_dirty_sync(dir, false);
 
        if (inode)
index c7cb217..6131323 100644 (file)
@@ -2736,7 +2736,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
 
 static inline struct page *f2fs_pagecache_get_page(
                                struct address_space *mapping, pgoff_t index,
-                               int fgp_flags, gfp_t gfp_mask)
+                               fgf_t fgp_flags, gfp_t gfp_mask)
 {
        if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET))
                return NULL;
@@ -3303,9 +3303,11 @@ static inline void clear_file(struct inode *inode, int type)
 
 static inline bool f2fs_is_time_consistent(struct inode *inode)
 {
+       struct timespec64 ctime = inode_get_ctime(inode);
+
        if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
                return false;
-       if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
+       if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ctime))
                return false;
        if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
                return false;
index 093039d..35886a5 100644 (file)
@@ -794,7 +794,7 @@ int f2fs_truncate(struct inode *inode)
        if (err)
                return err;
 
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        f2fs_mark_inode_dirty_sync(inode, false);
        return 0;
 }
@@ -882,7 +882,7 @@ int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
                                  STATX_ATTR_NODUMP |
                                  STATX_ATTR_VERITY);
 
-       generic_fillattr(idmap, inode, stat);
+       generic_fillattr(idmap, request_mask, inode, stat);
 
        /* we need to show initial sectors used for inline_data/dentries */
        if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
@@ -905,7 +905,7 @@ static void __setattr_copy(struct mnt_idmap *idmap,
        if (ia_valid & ATTR_MTIME)
                inode->i_mtime = attr->ia_mtime;
        if (ia_valid & ATTR_CTIME)
-               inode->i_ctime = attr->ia_ctime;
+               inode_set_ctime_to_ts(inode, attr->ia_ctime);
        if (ia_valid & ATTR_MODE) {
                umode_t mode = attr->ia_mode;
                vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
@@ -1008,7 +1008,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                        return err;
 
                spin_lock(&F2FS_I(inode)->i_size_lock);
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                F2FS_I(inode)->last_disk_size = i_size_read(inode);
                spin_unlock(&F2FS_I(inode)->i_size_lock);
        }
@@ -1835,7 +1835,7 @@ static long f2fs_fallocate(struct file *file, int mode,
        }
 
        if (!ret) {
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                f2fs_mark_inode_dirty_sync(inode, false);
                f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
        }
@@ -1937,7 +1937,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
        else
                clear_inode_flag(inode, FI_PROJ_INHERIT);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        f2fs_set_inode_flags(inode);
        f2fs_mark_inode_dirty_sync(inode, true);
        return 0;
@@ -2874,10 +2874,10 @@ out_src:
        if (ret)
                goto out_unlock;
 
-       src->i_mtime = src->i_ctime = current_time(src);
+       src->i_mtime = inode_set_ctime_current(src);
        f2fs_mark_inode_dirty_sync(src, false);
        if (src != dst) {
-               dst->i_mtime = dst->i_ctime = current_time(dst);
+               dst->i_mtime = inode_set_ctime_current(dst);
                f2fs_mark_inode_dirty_sync(dst, false);
        }
        f2fs_update_time(sbi, REQ_TIME);
@@ -3073,7 +3073,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
                goto out_unlock;
 
        fi->i_projid = kprojid;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        f2fs_mark_inode_dirty_sync(inode, true);
 out_unlock:
        f2fs_unlock_op(sbi);
@@ -3511,7 +3511,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
        }
 
        set_inode_flag(inode, FI_COMPRESS_RELEASED);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        f2fs_mark_inode_dirty_sync(inode, true);
 
        f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -3710,7 +3710,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
 
        if (ret >= 0) {
                clear_inode_flag(inode, FI_COMPRESS_RELEASED);
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                f2fs_mark_inode_dirty_sync(inode, true);
        }
 unlock_inode:
index 01effd3..a1ca394 100644 (file)
@@ -2181,12 +2181,14 @@ out_drop_write:
        if (err)
                return err;
 
-       err = freeze_super(sbi->sb);
+       err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
        if (err)
                return err;
 
        if (f2fs_readonly(sbi->sb)) {
-               thaw_super(sbi->sb);
+               err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+               if (err)
+                       return err;
                return -EROFS;
        }
 
@@ -2240,6 +2242,6 @@ recover_out:
 out_err:
        f2fs_up_write(&sbi->cp_global_sem);
        f2fs_up_write(&sbi->gc_lock);
-       thaw_super(sbi->sb);
+       thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
        return err;
 }
index 4638fee..88fc920 100644 (file)
@@ -698,7 +698,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
        set_page_dirty(page);
        f2fs_put_page(page, 1);
 
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        f2fs_mark_inode_dirty_sync(dir, false);
 
        if (inode)
index 09e986b..c1c2ba9 100644 (file)
@@ -403,7 +403,7 @@ static void init_idisk_time(struct inode *inode)
        struct f2fs_inode_info *fi = F2FS_I(inode);
 
        fi->i_disk_time[0] = inode->i_atime;
-       fi->i_disk_time[1] = inode->i_ctime;
+       fi->i_disk_time[1] = inode_get_ctime(inode);
        fi->i_disk_time[2] = inode->i_mtime;
 }
 
@@ -434,10 +434,10 @@ static int do_read_inode(struct inode *inode)
        inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
 
        inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
-       inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
+       inode_set_ctime(inode, le64_to_cpu(ri->i_ctime),
+                       le32_to_cpu(ri->i_ctime_nsec));
        inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
        inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
-       inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
        inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
        inode->i_generation = le32_to_cpu(ri->i_generation);
        if (S_ISDIR(inode->i_mode))
@@ -714,10 +714,10 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
        set_raw_inline(inode, ri);
 
        ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
-       ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+       ri->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
        ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
        ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
-       ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
        ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
        if (S_ISDIR(inode->i_mode))
                ri->i_current_depth =
index bee0568..193b22a 100644 (file)
@@ -243,7 +243,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
 
        inode->i_ino = ino;
        inode->i_blocks = 0;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        F2FS_I(inode)->i_crtime = inode->i_mtime;
        inode->i_generation = get_random_u32();
 
@@ -420,7 +420,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 
        f2fs_balance_fs(sbi, true);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        ihold(inode);
 
        set_inode_flag(inode, FI_INC_LINK);
@@ -1052,7 +1052,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                f2fs_set_link(new_dir, new_entry, new_page, old_inode);
                new_page = NULL;
 
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
                f2fs_down_write(&F2FS_I(new_inode)->i_sem);
                if (old_dir_entry)
                        f2fs_i_links_write(new_inode, false);
@@ -1086,7 +1086,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                f2fs_i_pino_write(old_inode, new_dir->i_ino);
        f2fs_up_write(&F2FS_I(old_inode)->i_sem);
 
-       old_inode->i_ctime = current_time(old_inode);
+       inode_set_ctime_current(old_inode);
        f2fs_mark_inode_dirty_sync(old_inode, false);
 
        f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
@@ -1251,7 +1251,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                f2fs_i_pino_write(old_inode, new_dir->i_ino);
        f2fs_up_write(&F2FS_I(old_inode)->i_sem);
 
-       old_dir->i_ctime = current_time(old_dir);
+       inode_set_ctime_current(old_dir);
        if (old_nlink) {
                f2fs_down_write(&F2FS_I(old_dir)->i_sem);
                f2fs_i_links_write(old_dir, old_nlink > 0);
@@ -1270,7 +1270,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
                f2fs_i_pino_write(new_inode, old_dir->i_ino);
        f2fs_up_write(&F2FS_I(new_inode)->i_sem);
 
-       new_dir->i_ctime = current_time(new_dir);
+       inode_set_ctime_current(new_dir);
        if (new_nlink) {
                f2fs_down_write(&F2FS_I(new_dir)->i_sem);
                f2fs_i_links_write(new_dir, new_nlink > 0);
index 4e7d4ce..b8637e8 100644 (file)
@@ -321,10 +321,10 @@ static int recover_inode(struct inode *inode, struct page *page)
 
        f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
        inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
-       inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
+       inode_set_ctime(inode, le64_to_cpu(raw->i_ctime),
+                       le32_to_cpu(raw->i_ctime_nsec));
        inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
        inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
-       inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
        inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
 
        F2FS_I(inode)->i_advise = raw->i_advise;
index ca31163..aa1f9a3 100644 (file)
@@ -1561,7 +1561,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
        int i;
 
        for (i = 0; i < sbi->s_ndevs; i++) {
-               blkdev_put(FDEV(i).bdev, sbi->sb->s_type);
+               blkdev_put(FDEV(i).bdev, sbi->sb);
 #ifdef CONFIG_BLK_DEV_ZONED
                kvfree(FDEV(i).blkz_seq);
 #endif
@@ -2703,7 +2703,7 @@ retry:
 
        if (len == towrite)
                return err;
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        f2fs_mark_inode_dirty_sync(inode, false);
        return len - towrite;
 }
@@ -4198,7 +4198,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
                        /* Single zoned block device mount */
                        FDEV(0).bdev =
                                blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode,
-                                                 sbi->sb->s_type, NULL);
+                                                 sbi->sb, NULL);
                } else {
                        /* Multi-device mount */
                        memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
@@ -4217,8 +4217,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
                                        sbi->log_blocks_per_seg) - 1;
                        }
                        FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode,
-                                                         sbi->sb->s_type,
-                                                         NULL);
+                                                         sbi->sb, NULL);
                }
                if (IS_ERR(FDEV(i).bdev))
                        return PTR_ERR(FDEV(i).bdev);
index 476b186..4ae93e1 100644 (file)
@@ -764,7 +764,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
 same:
        if (is_inode_flag_set(inode, FI_ACL_MODE)) {
                inode->i_mode = F2FS_I(inode)->i_acl_mode;
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                clear_inode_flag(inode, FI_ACL_MODE);
        }
 
index e3b690b..66cf477 100644 (file)
@@ -460,8 +460,7 @@ extern struct timespec64 fat_truncate_mtime(const struct msdos_sb_info *sbi,
                                            const struct timespec64 *ts);
 extern int fat_truncate_time(struct inode *inode, struct timespec64 *now,
                             int flags);
-extern int fat_update_time(struct inode *inode, struct timespec64 *now,
-                          int flags);
+extern int fat_update_time(struct inode *inode, int flags);
 extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
 
 int fat_cache_init(void);
index 4564779..e887e9a 100644 (file)
@@ -401,7 +401,7 @@ int fat_getattr(struct mnt_idmap *idmap, const struct path *path,
        struct inode *inode = d_inode(path->dentry);
        struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
 
-       generic_fillattr(idmap, inode, stat);
+       generic_fillattr(idmap, request_mask, inode, stat);
        stat->blksize = sbi->cluster_size;
 
        if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) {
index d99b854..cdd39b6 100644 (file)
@@ -562,7 +562,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
                           & ~((loff_t)sbi->cluster_size - 1)) >> 9;
 
        fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
-       inode->i_ctime = inode->i_mtime;
+       inode_set_ctime_to_ts(inode, inode->i_mtime);
        if (sbi->options.isvfat) {
                fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
                fat_time_fat2unix(sbi, &MSDOS_I(inode)->i_crtime, de->ctime,
@@ -1407,8 +1407,7 @@ static int fat_read_root(struct inode *inode)
        MSDOS_I(inode)->mmu_private = inode->i_size;
 
        fat_save_attrs(inode, ATTR_DIR);
-       inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
-       inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+       inode->i_mtime = inode->i_atime = inode_set_ctime(inode, 0, 0);
        set_nlink(inode, fat_subdirs(inode)+2);
 
        return 0;
index 7e5d6ae..f2304a1 100644 (file)
@@ -332,13 +332,14 @@ int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags)
         * but ctime updates are ignored.
         */
        if (flags & S_MTIME)
-               inode->i_mtime = inode->i_ctime = fat_truncate_mtime(sbi, now);
+               inode->i_mtime = inode_set_ctime_to_ts(inode,
+                                                      fat_truncate_mtime(sbi, now));
 
        return 0;
 }
 EXPORT_SYMBOL_GPL(fat_truncate_time);
 
-int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
+int fat_update_time(struct inode *inode, int flags)
 {
        int dirty_flags = 0;
 
@@ -346,16 +347,13 @@ int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
                return 0;
 
        if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
-               fat_truncate_time(inode, now, flags);
+               fat_truncate_time(inode, NULL, flags);
                if (inode->i_sb->s_flags & SB_LAZYTIME)
                        dirty_flags |= I_DIRTY_TIME;
                else
                        dirty_flags |= I_DIRTY_SYNC;
        }
 
-       if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
-               dirty_flags |= I_DIRTY_SYNC;
-
        __mark_inode_dirty(inode, dirty_flags);
        return 0;
 }
index b622be1..e871009 100644 (file)
@@ -34,7 +34,7 @@
 
 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 
-static int setfl(int fd, struct file * filp, unsigned long arg)
+static int setfl(int fd, struct file * filp, unsigned int arg)
 {
        struct inode * inode = file_inode(filp);
        int error = 0;
@@ -112,11 +112,11 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 }
 EXPORT_SYMBOL(__f_setown);
 
-int f_setown(struct file *filp, unsigned long arg, int force)
+int f_setown(struct file *filp, int who, int force)
 {
        enum pid_type type;
        struct pid *pid = NULL;
-       int who = arg, ret = 0;
+       int ret = 0;
 
        type = PIDTYPE_TGID;
        if (who < 0) {
@@ -317,28 +317,29 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                struct file *filp)
 {
        void __user *argp = (void __user *)arg;
+       int argi = (int)arg;
        struct flock flock;
        long err = -EINVAL;
 
        switch (cmd) {
        case F_DUPFD:
-               err = f_dupfd(arg, filp, 0);
+               err = f_dupfd(argi, filp, 0);
                break;
        case F_DUPFD_CLOEXEC:
-               err = f_dupfd(arg, filp, O_CLOEXEC);
+               err = f_dupfd(argi, filp, O_CLOEXEC);
                break;
        case F_GETFD:
                err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
                break;
        case F_SETFD:
                err = 0;
-               set_close_on_exec(fd, arg & FD_CLOEXEC);
+               set_close_on_exec(fd, argi & FD_CLOEXEC);
                break;
        case F_GETFL:
                err = filp->f_flags;
                break;
        case F_SETFL:
-               err = setfl(fd, filp, arg);
+               err = setfl(fd, filp, argi);
                break;
 #if BITS_PER_LONG != 32
        /* 32-bit arches must use fcntl64() */
@@ -375,7 +376,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                force_successful_syscall_return();
                break;
        case F_SETOWN:
-               err = f_setown(filp, arg, 1);
+               err = f_setown(filp, argi, 1);
                break;
        case F_GETOWN_EX:
                err = f_getown_ex(filp, arg);
@@ -391,28 +392,28 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                break;
        case F_SETSIG:
                /* arg == 0 restores default behaviour. */
-               if (!valid_signal(arg)) {
+               if (!valid_signal(argi)) {
                        break;
                }
                err = 0;
-               filp->f_owner.signum = arg;
+               filp->f_owner.signum = argi;
                break;
        case F_GETLEASE:
                err = fcntl_getlease(filp);
                break;
        case F_SETLEASE:
-               err = fcntl_setlease(fd, filp, arg);
+               err = fcntl_setlease(fd, filp, argi);
                break;
        case F_NOTIFY:
-               err = fcntl_dirnotify(fd, filp, arg);
+               err = fcntl_dirnotify(fd, filp, argi);
                break;
        case F_SETPIPE_SZ:
        case F_GETPIPE_SZ:
-               err = pipe_fcntl(filp, cmd, arg);
+               err = pipe_fcntl(filp, cmd, argi);
                break;
        case F_ADD_SEALS:
        case F_GET_SEALS:
-               err = memfd_fcntl(filp, cmd, arg);
+               err = memfd_fcntl(filp, cmd, argi);
                break;
        case F_GET_RW_HINT:
        case F_SET_RW_HINT:
index 7893ea1..3e4a4df 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -668,7 +668,7 @@ EXPORT_SYMBOL(close_fd); /* for ksys_close() */
 
 /**
  * last_fd - return last valid index into fd table
- * @cur_fds: files struct
+ * @fdt: File descriptor table.
  *
  * Context: Either rcu read lock or files_lock must be held.
  *
@@ -693,29 +693,30 @@ static inline void __range_cloexec(struct files_struct *cur_fds,
        spin_unlock(&cur_fds->file_lock);
 }
 
-static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
+static inline void __range_close(struct files_struct *files, unsigned int fd,
                                 unsigned int max_fd)
 {
+       struct file *file;
        unsigned n;
 
-       rcu_read_lock();
-       n = last_fd(files_fdtable(cur_fds));
-       rcu_read_unlock();
+       spin_lock(&files->file_lock);
+       n = last_fd(files_fdtable(files));
        max_fd = min(max_fd, n);
 
-       while (fd <= max_fd) {
-               struct file *file;
-
-               spin_lock(&cur_fds->file_lock);
-               file = pick_file(cur_fds, fd++);
-               spin_unlock(&cur_fds->file_lock);
-
+       for (; fd <= max_fd; fd++) {
+               file = pick_file(files, fd);
                if (file) {
-                       /* found a valid file to close */
-                       filp_close(file, cur_fds);
+                       spin_unlock(&files->file_lock);
+                       filp_close(file, files);
                        cond_resched();
+                       spin_lock(&files->file_lock);
+               } else if (need_resched()) {
+                       spin_unlock(&files->file_lock);
+                       cond_resched();
+                       spin_lock(&files->file_lock);
                }
        }
+       spin_unlock(&files->file_lock);
 }
 
 /**
@@ -723,6 +724,7 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
  *
  * @fd:     starting file descriptor to close
  * @max_fd: last file descriptor to close
+ * @flags:  CLOSE_RANGE flags.
  *
  * This closes a range of file descriptors. All file descriptors
  * from @fd up to and including @max_fd are closed.
@@ -1036,16 +1038,30 @@ unsigned long __fdget_raw(unsigned int fd)
        return __fget_light(fd, 0);
 }
 
+/*
+ * Try to avoid f_pos locking. We only need it if the
+ * file is marked for FMODE_ATOMIC_POS, and it can be
+ * accessed multiple ways.
+ *
+ * Always do it for directories, because pidfd_getfd()
+ * can make a file accessible even if it otherwise would
+ * not be, and for directories this is a correctness
+ * issue, not a "POSIX requirement".
+ */
+static inline bool file_needs_f_pos_lock(struct file *file)
+{
+       return (file->f_mode & FMODE_ATOMIC_POS) &&
+               (file_count(file) > 1 || file->f_op->iterate_shared);
+}
+
 unsigned long __fdget_pos(unsigned int fd)
 {
        unsigned long v = __fdget(fd);
        struct file *file = (struct file *)(v & ~3);
 
-       if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
-               if (file_count(file) > 1) {
-                       v |= FDPUT_POS_UNLOCK;
-                       mutex_lock(&file->f_pos_lock);
-               }
+       if (file && file_needs_f_pos_lock(file)) {
+               v |= FDPUT_POS_UNLOCK;
+               mutex_lock(&file->f_pos_lock);
        }
        return v;
 }
index fc7d677..ee21b3d 100644 (file)
@@ -461,11 +461,8 @@ void fput(struct file *file)
  */
 void __fput_sync(struct file *file)
 {
-       if (atomic_long_dec_and_test(&file->f_count)) {
-               struct task_struct *task = current;
-               BUG_ON(!(task->flags & PF_KTHREAD));
+       if (atomic_long_dec_and_test(&file->f_count))
                __fput(file);
-       }
 }
 
 EXPORT_SYMBOL(fput);
index ceb6a12..ac5d43b 100644 (file)
@@ -110,10 +110,9 @@ static inline void dip2vip_cpy(struct vxfs_sb_info *sbi,
        inode->i_size = vip->vii_size;
 
        inode->i_atime.tv_sec = vip->vii_atime;
-       inode->i_ctime.tv_sec = vip->vii_ctime;
+       inode_set_ctime(inode, vip->vii_ctime, 0);
        inode->i_mtime.tv_sec = vip->vii_mtime;
        inode->i_atime.tv_nsec = 0;
-       inode->i_ctime.tv_nsec = 0;
        inode->i_mtime.tv_nsec = 0;
 
        inode->i_blocks = vip->vii_blocks;
index aca4b48..969ce99 100644 (file)
@@ -1953,9 +1953,9 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
                struct inode *inode = wb_inode(wb->b_io.prev);
                struct super_block *sb = inode->i_sb;
 
-               if (!trylock_super(sb)) {
+               if (!super_trylock_shared(sb)) {
                        /*
-                        * trylock_super() may fail consistently due to
+                        * super_trylock_shared() may fail consistently due to
                         * s_umount being grabbed by someone else. Don't use
                         * requeue_io() to avoid busy retrying the inode/sb.
                         */
index 851214d..a0ad7a0 100644 (file)
@@ -162,6 +162,10 @@ EXPORT_SYMBOL(vfs_parse_fs_param);
 
 /**
  * vfs_parse_fs_string - Convenience function to just parse a string.
+ * @fc: Filesystem context.
+ * @key: Parameter name.
+ * @value: Default value.
+ * @v_size: Maximum number of bytes in the value.
  */
 int vfs_parse_fs_string(struct fs_context *fc, const char *key,
                        const char *value, size_t v_size)
@@ -189,7 +193,7 @@ EXPORT_SYMBOL(vfs_parse_fs_string);
 
 /**
  * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
- * @ctx: The superblock configuration to fill in.
+ * @fc: The superblock configuration to fill in.
  * @data: The data to parse
  *
  * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
@@ -315,10 +319,31 @@ struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
 }
 EXPORT_SYMBOL(fs_context_for_reconfigure);
 
+/**
+ * fs_context_for_submount: allocate a new fs_context for a submount
+ * @type: file_system_type of the new context
+ * @reference: reference dentry from which to copy relevant info
+ *
+ * Allocate a new fs_context suitable for a submount. This also ensures that
+ * the fc->security object is inherited from @reference (if needed).
+ */
 struct fs_context *fs_context_for_submount(struct file_system_type *type,
                                           struct dentry *reference)
 {
-       return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
+       struct fs_context *fc;
+       int ret;
+
+       fc = alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
+       if (IS_ERR(fc))
+               return fc;
+
+       ret = security_fs_context_submount(fc, reference->d_sb);
+       if (ret) {
+               put_fs_context(fc);
+               return ERR_PTR(ret);
+       }
+
+       return fc;
 }
 EXPORT_SYMBOL(fs_context_for_submount);
 
@@ -333,7 +358,7 @@ void fc_drop_locked(struct fs_context *fc)
 static void legacy_fs_context_free(struct fs_context *fc);
 
 /**
- * vfs_dup_fc_config: Duplicate a filesystem context.
+ * vfs_dup_fs_context - Duplicate a filesystem context.
  * @src_fc: The context to copy.
  */
 struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
@@ -379,7 +404,9 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
 
 /**
  * logfc - Log a message to a filesystem context
- * @fc: The filesystem context to log to.
+ * @log: The filesystem context to log to, or NULL to use printk.
+ * @prefix: A string to prefix the output with, or NULL.
+ * @level: 'w' for a warning, 'e' for an error.  Anything else is a notice.
  * @fmt: The format of the buffer.
  */
 void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...)
@@ -692,6 +719,7 @@ void vfs_clean_context(struct fs_context *fc)
        security_free_mnt_opts(&fc->security);
        kfree(fc->source);
        fc->source = NULL;
+       fc->exclusive = false;
 
        fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
        fc->phase = FS_CONTEXT_AWAITING_RECONF;
index fc9d2d9..ce03f65 100644 (file)
@@ -209,6 +209,72 @@ err:
        return ret;
 }
 
+static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
+{
+       struct super_block *sb;
+       int ret;
+
+       if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
+               return -EBUSY;
+
+       if (!mount_capable(fc))
+               return -EPERM;
+
+       /* require the new mount api */
+       if (exclusive && fc->ops == &legacy_fs_context_ops)
+               return -EOPNOTSUPP;
+
+       fc->phase = FS_CONTEXT_CREATING;
+       fc->exclusive = exclusive;
+
+       ret = vfs_get_tree(fc);
+       if (ret) {
+               fc->phase = FS_CONTEXT_FAILED;
+               return ret;
+       }
+
+       sb = fc->root->d_sb;
+       ret = security_sb_kern_mount(sb);
+       if (unlikely(ret)) {
+               fc_drop_locked(fc);
+               fc->phase = FS_CONTEXT_FAILED;
+               return ret;
+       }
+
+       /* vfs_get_tree() callchains will have grabbed @s_umount */
+       up_write(&sb->s_umount);
+       fc->phase = FS_CONTEXT_AWAITING_MOUNT;
+       return 0;
+}
+
+static int vfs_cmd_reconfigure(struct fs_context *fc)
+{
+       struct super_block *sb;
+       int ret;
+
+       if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
+               return -EBUSY;
+
+       fc->phase = FS_CONTEXT_RECONFIGURING;
+
+       sb = fc->root->d_sb;
+       if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
+               fc->phase = FS_CONTEXT_FAILED;
+               return -EPERM;
+       }
+
+       down_write(&sb->s_umount);
+       ret = reconfigure_super(fc);
+       up_write(&sb->s_umount);
+       if (ret) {
+               fc->phase = FS_CONTEXT_FAILED;
+               return ret;
+       }
+
+       vfs_clean_context(fc);
+       return 0;
+}
+
 /*
  * Check the state and apply the configuration.  Note that this function is
  * allowed to 'steal' the value by setting param->xxx to NULL before returning.
@@ -216,7 +282,6 @@ err:
 static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
                               struct fs_parameter *param)
 {
-       struct super_block *sb;
        int ret;
 
        ret = finish_clean_context(fc);
@@ -224,39 +289,11 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
                return ret;
        switch (cmd) {
        case FSCONFIG_CMD_CREATE:
-               if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
-                       return -EBUSY;
-               if (!mount_capable(fc))
-                       return -EPERM;
-               fc->phase = FS_CONTEXT_CREATING;
-               ret = vfs_get_tree(fc);
-               if (ret)
-                       break;
-               sb = fc->root->d_sb;
-               ret = security_sb_kern_mount(sb);
-               if (unlikely(ret)) {
-                       fc_drop_locked(fc);
-                       break;
-               }
-               up_write(&sb->s_umount);
-               fc->phase = FS_CONTEXT_AWAITING_MOUNT;
-               return 0;
+               return vfs_cmd_create(fc, false);
+       case FSCONFIG_CMD_CREATE_EXCL:
+               return vfs_cmd_create(fc, true);
        case FSCONFIG_CMD_RECONFIGURE:
-               if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
-                       return -EBUSY;
-               fc->phase = FS_CONTEXT_RECONFIGURING;
-               sb = fc->root->d_sb;
-               if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
-                       ret = -EPERM;
-                       break;
-               }
-               down_write(&sb->s_umount);
-               ret = reconfigure_super(fc);
-               up_write(&sb->s_umount);
-               if (ret)
-                       break;
-               vfs_clean_context(fc);
-               return 0;
+               return vfs_cmd_reconfigure(fc);
        default:
                if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
                    fc->phase != FS_CONTEXT_RECONF_PARAMS)
@@ -264,8 +301,6 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
 
                return vfs_parse_fs_param(fc, param);
        }
-       fc->phase = FS_CONTEXT_FAILED;
-       return ret;
 }
 
 /**
@@ -353,6 +388,7 @@ SYSCALL_DEFINE5(fsconfig,
                        return -EINVAL;
                break;
        case FSCONFIG_CMD_CREATE:
+       case FSCONFIG_CMD_CREATE_EXCL:
        case FSCONFIG_CMD_RECONFIGURE:
                if (_key || _value || aux)
                        return -EINVAL;
index 247ef4f..ab62e46 100644 (file)
@@ -235,7 +235,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
        inode->i_mode = mode;
        inode->i_uid = fc->user_id;
        inode->i_gid = fc->group_id;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        /* setting ->i_op to NULL is not allowed */
        if (iop)
                inode->i_op = iop;
index 35bc174..881524b 100644 (file)
@@ -258,7 +258,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
                        spin_unlock(&fi->lock);
                }
                kfree(forget);
-               if (ret == -ENOMEM)
+               if (ret == -ENOMEM || ret == -EINTR)
                        goto out;
                if (ret || fuse_invalid_attr(&outarg.attr) ||
                    fuse_stale_inode(inode, outarg.generation, &outarg.attr))
@@ -395,8 +395,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
                goto out_put_forget;
 
        err = -EIO;
-       if (!outarg->nodeid)
-               goto out_put_forget;
        if (fuse_invalid_attr(&outarg->attr))
                goto out_put_forget;
 
@@ -935,7 +933,7 @@ void fuse_flush_time_update(struct inode *inode)
 static void fuse_update_ctime_in_cache(struct inode *inode)
 {
        if (!IS_NOCMTIME(inode)) {
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                mark_inode_dirty_sync(inode);
                fuse_flush_time_update(inode);
        }
@@ -1224,7 +1222,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
                forget_all_cached_acls(inode);
                err = fuse_do_getattr(inode, stat, file);
        } else if (stat) {
-               generic_fillattr(&nop_mnt_idmap, inode, stat);
+               generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
                stat->mode = fi->orig_i_mode;
                stat->ino = fi->orig_ino;
        }
@@ -1717,8 +1715,8 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
        inarg.mtimensec = inode->i_mtime.tv_nsec;
        if (fm->fc->minor >= 23) {
                inarg.valid |= FATTR_CTIME;
-               inarg.ctime = inode->i_ctime.tv_sec;
-               inarg.ctimensec = inode->i_ctime.tv_nsec;
+               inarg.ctime = inode_get_ctime(inode).tv_sec;
+               inarg.ctimensec = inode_get_ctime(inode).tv_nsec;
        }
        if (ff) {
                inarg.valid |= FATTR_FH;
@@ -1859,7 +1857,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
                if (attr->ia_valid & ATTR_MTIME)
                        inode->i_mtime = attr->ia_mtime;
                if (attr->ia_valid & ATTR_CTIME)
-                       inode->i_ctime = attr->ia_ctime;
+                       inode_set_ctime_to_ts(inode, attr->ia_ctime);
                /* FIXME: clear I_DIRTY_SYNC? */
        }
 
index d66070a..549358f 100644 (file)
@@ -194,8 +194,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
                inode->i_mtime.tv_nsec  = attr->mtimensec;
        }
        if (!(cache_mask & STATX_CTIME)) {
-               inode->i_ctime.tv_sec   = attr->ctime;
-               inode->i_ctime.tv_nsec  = attr->ctimensec;
+               inode_set_ctime(inode, attr->ctime, attr->ctimensec);
        }
 
        if (attr->blksize != 0)
@@ -259,8 +258,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
                attr->mtimensec = inode->i_mtime.tv_nsec;
        }
        if (cache_mask & STATX_CTIME) {
-               attr->ctime = inode->i_ctime.tv_sec;
-               attr->ctimensec = inode->i_ctime.tv_nsec;
+               attr->ctime = inode_get_ctime(inode).tv_sec;
+               attr->ctimensec = inode_get_ctime(inode).tv_nsec;
        }
 
        if ((attr_version != 0 && fi->attr_version > attr_version) ||
@@ -318,8 +317,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
        inode->i_size = attr->size;
        inode->i_mtime.tv_sec  = attr->mtime;
        inode->i_mtime.tv_nsec = attr->mtimensec;
-       inode->i_ctime.tv_sec  = attr->ctime;
-       inode->i_ctime.tv_nsec = attr->ctimensec;
+       inode_set_ctime(inode, attr->ctime, attr->ctimensec);
        if (S_ISREG(inode->i_mode)) {
                fuse_init_common(inode);
                fuse_init_file_inode(inode, attr->flags);
@@ -1134,7 +1132,10 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
                process_init_limits(fc, arg);
 
                if (arg->minor >= 6) {
-                       u64 flags = arg->flags | (u64) arg->flags2 << 32;
+                       u64 flags = arg->flags;
+
+                       if (flags & FUSE_INIT_EXT)
+                               flags |= (u64) arg->flags2 << 32;
 
                        ra_pages = arg->max_readahead / PAGE_SIZE;
                        if (flags & FUSE_ASYNC_READ)
@@ -1254,7 +1255,8 @@ void fuse_send_init(struct fuse_mount *fm)
                FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
                FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
                FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
-               FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP;
+               FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
+               FUSE_HAS_EXPIRE_ONLY;
 #ifdef CONFIG_FUSE_DAX
        if (fm->fc->dax)
                flags |= FUSE_MAP_ALIGNMENT;
@@ -1397,16 +1399,18 @@ EXPORT_SYMBOL_GPL(fuse_dev_free);
 static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
                                      const struct fuse_inode *fi)
 {
+       struct timespec64 ctime = inode_get_ctime(&fi->inode);
+
        *attr = (struct fuse_attr){
                .ino            = fi->inode.i_ino,
                .size           = fi->inode.i_size,
                .blocks         = fi->inode.i_blocks,
                .atime          = fi->inode.i_atime.tv_sec,
                .mtime          = fi->inode.i_mtime.tv_sec,
-               .ctime          = fi->inode.i_ctime.tv_sec,
+               .ctime          = ctime.tv_sec,
                .atimensec      = fi->inode.i_atime.tv_nsec,
                .mtimensec      = fi->inode.i_mtime.tv_nsec,
-               .ctimensec      = fi->inode.i_ctime.tv_nsec,
+               .ctimensec      = ctime.tv_nsec,
                .mode           = fi->inode.i_mode,
                .nlink          = fi->inode.i_nlink,
                .uid            = fi->inode.i_uid.val,
index 8e01bfd..726640f 100644 (file)
@@ -9,14 +9,23 @@
 #include <linux/compat.h>
 #include <linux/fileattr.h>
 
-static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args)
+static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args,
+                              struct fuse_ioctl_out *outarg)
 {
-       ssize_t ret = fuse_simple_request(fm, args);
+       ssize_t ret;
+
+       args->out_args[0].size = sizeof(*outarg);
+       args->out_args[0].value = outarg;
+
+       ret = fuse_simple_request(fm, args);
 
        /* Translate ENOSYS, which shouldn't be returned from fs */
        if (ret == -ENOSYS)
                ret = -ENOTTY;
 
+       if (ret >= 0 && outarg->result == -ENOSYS)
+               outarg->result = -ENOTTY;
+
        return ret;
 }
 
@@ -264,13 +273,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
        }
 
        ap.args.out_numargs = 2;
-       ap.args.out_args[0].size = sizeof(outarg);
-       ap.args.out_args[0].value = &outarg;
        ap.args.out_args[1].size = out_size;
        ap.args.out_pages = true;
        ap.args.out_argvar = true;
 
-       transferred = fuse_send_ioctl(fm, &ap.args);
+       transferred = fuse_send_ioctl(fm, &ap.args, &outarg);
        err = transferred;
        if (transferred < 0)
                goto out;
@@ -399,12 +406,10 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
        args.in_args[1].size = inarg.in_size;
        args.in_args[1].value = ptr;
        args.out_numargs = 2;
-       args.out_args[0].size = sizeof(outarg);
-       args.out_args[0].value = &outarg;
        args.out_args[1].size = inarg.out_size;
        args.out_args[1].value = ptr;
 
-       err = fuse_send_ioctl(fm, &args);
+       err = fuse_send_ioctl(fm, &args, &outarg);
        if (!err) {
                if (outarg.result < 0)
                        err = outarg.result;
index a392aa0..443640e 100644 (file)
@@ -142,7 +142,7 @@ int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 
        ret = __gfs2_set_acl(inode, acl, type);
        if (!ret && mode != inode->i_mode) {
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                inode->i_mode = mode;
                mark_inode_dirty(inode);
        }
index ae49256..9c4b26a 100644 (file)
@@ -747,7 +747,7 @@ static const struct address_space_operations gfs2_aops = {
        .writepages = gfs2_writepages,
        .read_folio = gfs2_read_folio,
        .readahead = gfs2_readahead,
-       .dirty_folio = filemap_dirty_folio,
+       .dirty_folio = iomap_dirty_folio,
        .release_folio = iomap_release_folio,
        .invalidate_folio = iomap_invalidate_folio,
        .bmap = gfs2_bmap,
index 8d611fb..f62366b 100644 (file)
@@ -971,7 +971,7 @@ gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
        if (status)
                return ERR_PTR(status);
 
-       folio = iomap_get_folio(iter, pos);
+       folio = iomap_get_folio(iter, pos, len);
        if (IS_ERR(folio))
                gfs2_trans_end(sdp);
        return folio;
@@ -1386,7 +1386,7 @@ static int trunc_start(struct inode *inode, u64 newsize)
                ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
 
        i_size_write(inode, newsize);
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
        gfs2_dinode_out(ip, dibh->b_data);
 
        if (journaled)
@@ -1583,8 +1583,7 @@ out_unlock:
 
                        /* Every transaction boundary, we rewrite the dinode
                           to keep its di_blocks current in case of failure. */
-                       ip->i_inode.i_mtime = ip->i_inode.i_ctime =
-                               current_time(&ip->i_inode);
+                       ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
                        gfs2_trans_add_meta(ip->i_gl, dibh);
                        gfs2_dinode_out(ip, dibh->b_data);
                        brelse(dibh);
@@ -1950,7 +1949,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
                gfs2_statfs_change(sdp, 0, +btotal, 0);
                gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
                                  ip->i_inode.i_gid);
-               ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+               ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
                gfs2_trans_add_meta(ip->i_gl, dibh);
                gfs2_dinode_out(ip, dibh->b_data);
                up_write(&ip->i_rw_mutex);
@@ -1993,7 +1992,7 @@ static int trunc_end(struct gfs2_inode *ip)
                gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
                gfs2_ordered_del_inode(ip);
        }
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
        ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
 
        gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -2094,7 +2093,7 @@ static int do_grow(struct inode *inode, u64 size)
                goto do_end_trans;
 
        truncate_setsize(inode, size);
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_dinode_out(ip, dibh->b_data);
        brelse(dibh);
index 54a6d17..1a2afa8 100644 (file)
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
        memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
        if (ip->i_inode.i_size < offset + size)
                i_size_write(&ip->i_inode, offset + size);
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
        gfs2_dinode_out(ip, dibh->b_data);
 
        brelse(dibh);
@@ -227,7 +227,7 @@ out:
 
        if (ip->i_inode.i_size < offset + copied)
                i_size_write(&ip->i_inode, offset + copied);
-       ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
 
        gfs2_trans_add_meta(ip->i_gl, dibh);
        gfs2_dinode_out(ip, dibh->b_data);
@@ -1814,7 +1814,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                        gfs2_inum_out(nip, dent);
                        dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
                        dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip));
-                       tv = current_time(&ip->i_inode);
+                       tv = inode_set_ctime_current(&ip->i_inode);
                        if (ip->i_diskflags & GFS2_DIF_EXHASH) {
                                leaf = (struct gfs2_leaf *)bh->b_data;
                                be16_add_cpu(&leaf->lf_entries, 1);
@@ -1825,7 +1825,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
                        da->bh = NULL;
                        brelse(bh);
                        ip->i_entries++;
-                       ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv;
+                       ip->i_inode.i_mtime = tv;
                        if (S_ISDIR(nip->i_inode.i_mode))
                                inc_nlink(&ip->i_inode);
                        mark_inode_dirty(inode);
@@ -1876,7 +1876,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
        const struct qstr *name = &dentry->d_name;
        struct gfs2_dirent *dent, *prev = NULL;
        struct buffer_head *bh;
-       struct timespec64 tv = current_time(&dip->i_inode);
+       struct timespec64 tv;
 
        /* Returns _either_ the entry (if its first in block) or the
           previous entry otherwise */
@@ -1896,6 +1896,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
        }
 
        dirent_del(dip, bh, prev, dent);
+       tv = inode_set_ctime_current(&dip->i_inode);
        if (dip->i_diskflags & GFS2_DIF_EXHASH) {
                struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
                u16 entries = be16_to_cpu(leaf->lf_entries);
@@ -1910,7 +1911,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
        if (!dip->i_entries)
                gfs2_consist_inode(dip);
        dip->i_entries--;
-       dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv;
+       dip->i_inode.i_mtime =  tv;
        if (d_is_dir(dentry))
                drop_nlink(&dip->i_inode);
        mark_inode_dirty(&dip->i_inode);
@@ -1951,7 +1952,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
        dent->de_type = cpu_to_be16(new_type);
        brelse(bh);
 
-       dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode);
+       dip->i_inode.i_mtime = inode_set_ctime_current(&dip->i_inode);
        mark_inode_dirty_sync(&dip->i_inode);
        return 0;
 }
index 1bf3c44..766186c 100644 (file)
@@ -260,7 +260,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask)
        error = gfs2_meta_inode_buffer(ip, &bh);
        if (error)
                goto out_trans_end;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        gfs2_trans_add_meta(ip->i_gl, bh);
        ip->i_diskflags = new_flags;
        gfs2_dinode_out(ip, bh->b_data);
@@ -1578,7 +1578,7 @@ const struct file_operations gfs2_file_fops = {
        .fsync          = gfs2_fsync,
        .lock           = gfs2_lock,
        .flock          = gfs2_flock,
-       .splice_read    = filemap_splice_read,
+       .splice_read    = copy_splice_read,
        .splice_write   = gfs2_file_splice_write,
        .setlease       = simple_nosetlease,
        .fallocate      = gfs2_fallocate,
@@ -1609,7 +1609,7 @@ const struct file_operations gfs2_file_fops_nolock = {
        .open           = gfs2_open,
        .release        = gfs2_release,
        .fsync          = gfs2_fsync,
-       .splice_read    = filemap_splice_read,
+       .splice_read    = copy_splice_read,
        .splice_write   = gfs2_file_splice_write,
        .setlease       = generic_setlease,
        .fallocate      = gfs2_fallocate,
index 5431932..aecdac3 100644 (file)
@@ -437,8 +437,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
                inode->i_atime = atime;
        inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
        inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
-       inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-       inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+       inode_set_ctime(inode, be64_to_cpu(str->di_ctime),
+                       be32_to_cpu(str->di_ctime_nsec));
 
        ip->i_goal = be64_to_cpu(str->di_goal_meta);
        ip->i_generation = be64_to_cpu(str->di_generation);
index 17c994a..a21ac41 100644 (file)
@@ -690,7 +690,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
        set_nlink(inode, S_ISDIR(mode) ? 2 : 1);
        inode->i_rdev = dev;
        inode->i_size = size;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        munge_mode_uid_gid(dip, inode);
        check_and_update_goal(dip);
        ip->i_goal = dip->i_goal;
@@ -1029,7 +1029,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
 
        gfs2_trans_add_meta(ip->i_gl, dibh);
        inc_nlink(&ip->i_inode);
-       ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       inode_set_ctime_current(&ip->i_inode);
        ihold(inode);
        d_instantiate(dentry, inode);
        mark_inode_dirty(inode);
@@ -1114,7 +1114,7 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip,
                return error;
 
        ip->i_entries = 0;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        if (S_ISDIR(inode->i_mode))
                clear_nlink(inode);
        else
@@ -1371,7 +1371,7 @@ static int update_moved_ino(struct gfs2_inode *ip, struct gfs2_inode *ndip,
        if (dir_rename)
                return gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
 
-       ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       inode_set_ctime_current(&ip->i_inode);
        mark_inode_dirty_sync(&ip->i_inode);
        return 0;
 }
@@ -2071,7 +2071,7 @@ static int gfs2_getattr(struct mnt_idmap *idmap,
                                  STATX_ATTR_IMMUTABLE |
                                  STATX_ATTR_NODUMP);
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
        if (gfs2_holder_initialized(&gh))
                gfs2_glock_dq_uninit(&gh);
@@ -2139,8 +2139,7 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset)
        return vfs_setpos(file, ret, inode->i_sb->s_maxbytes);
 }
 
-static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
-                           int flags)
+static int gfs2_update_time(struct inode *inode, int flags)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_glock *gl = ip->i_gl;
@@ -2155,7 +2154,8 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
                if (error)
                        return error;
        }
-       return generic_update_time(inode, time, flags);
+       generic_update_time(inode, flags);
+       return 0;
 }
 
 static const struct inode_operations gfs2_file_iops = {
index 704192b..aa5fd06 100644 (file)
@@ -871,7 +871,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                size = loc + sizeof(struct gfs2_quota);
                if (size > inode->i_size)
                        i_size_write(inode, size);
-               inode->i_mtime = inode->i_atime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                mark_inode_dirty(inode);
                set_bit(QDF_REFRESH, &qd->qd_flags);
        }
index 9f4d5d6..2f70133 100644 (file)
@@ -412,7 +412,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
        str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
        str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
        str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
-       str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
+       str->di_ctime = cpu_to_be64(inode_get_ctime(inode).tv_sec);
 
        str->di_goal_meta = cpu_to_be64(ip->i_goal);
        str->di_goal_data = cpu_to_be64(ip->i_goal);
@@ -429,7 +429,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
        str->di_eattr = cpu_to_be64(ip->i_eattr);
        str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
        str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
-       str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+       str->di_ctime_nsec = cpu_to_be32(inode_get_ctime(inode).tv_nsec);
 }
 
 /**
@@ -689,7 +689,7 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp)
        struct super_block *sb = sdp->sd_vfs;
        int error;
 
-       error = freeze_super(sb);
+       error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
        if (error)
                return error;
 
@@ -697,7 +697,9 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp)
                gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
                               GFS2_LFC_FREEZE_GO_SYNC);
                if (gfs2_withdrawn(sdp)) {
-                       thaw_super(sb);
+                       error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+                       if (error)
+                               return error;
                        return -EIO;
                }
        }
@@ -712,7 +714,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp)
        error = gfs2_freeze_lock_shared(sdp);
        if (error)
                goto fail;
-       error = thaw_super(sb);
+       error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
        if (!error)
                return 0;
 
@@ -761,7 +763,7 @@ out:
  *
  */
 
-static int gfs2_freeze_super(struct super_block *sb)
+static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
        int error;
@@ -816,7 +818,7 @@ out:
  *
  */
 
-static int gfs2_thaw_super(struct super_block *sb)
+static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
        int error;
index 2dfbe2f..c60bc7f 100644 (file)
@@ -168,10 +168,10 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
 
        switch (n) {
        case 0:
-               error = thaw_super(sdp->sd_vfs);
+               error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
                break;
        case 1:
-               error = freeze_super(sdp->sd_vfs);
+               error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
                break;
        default:
                return -EINVAL;
index ec16312..7e835be 100644 (file)
@@ -230,9 +230,11 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
 {
 
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+       struct super_block *sb = sdp->sd_vfs;
        struct gfs2_bufdata *bd;
        struct gfs2_meta_header *mh;
        struct gfs2_trans *tr = current->journal_info;
+       bool withdraw = false;
 
        lock_buffer(bh);
        if (buffer_pinned(bh)) {
@@ -266,13 +268,15 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
                       (unsigned long long)bd->bd_bh->b_blocknr);
                BUG();
        }
-       if (unlikely(test_bit(SDF_FROZEN, &sdp->sd_flags))) {
-               fs_info(sdp, "GFS2:adding buf while frozen\n");
-               gfs2_assert_withdraw(sdp, 0);
-       }
        if (unlikely(gfs2_withdrawn(sdp))) {
                fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n",
                        (unsigned long long)bd->bd_bh->b_blocknr);
+               goto out_unlock;
+       }
+       if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) {
+               fs_info(sdp, "GFS2:adding buf while frozen\n");
+               withdraw = true;
+               goto out_unlock;
        }
        gfs2_pin(sdp, bd->bd_bh);
        mh->__pad0 = cpu_to_be64(0);
@@ -281,6 +285,8 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
        tr->tr_num_buf_new++;
 out_unlock:
        gfs2_log_unlock(sdp);
+       if (withdraw)
+               gfs2_assert_withdraw(sdp, 0);
 out:
        unlock_buffer(bh);
 }
index 93b36d0..4fea70c 100644 (file)
@@ -311,7 +311,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
                ea->ea_num_ptrs = 0;
        }
 
-       ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       inode_set_ctime_current(&ip->i_inode);
        __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
        gfs2_trans_end(sdp);
@@ -763,7 +763,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
        if (error)
                goto out_end_trans;
 
-       ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       inode_set_ctime_current(&ip->i_inode);
        __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
 out_end_trans:
@@ -888,7 +888,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
        if (es->es_el)
                ea_set_remove_stuffed(ip, es->es_el);
 
-       ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       inode_set_ctime_current(&ip->i_inode);
        __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
        gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -1106,7 +1106,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
                ea->ea_type = GFS2_EATYPE_UNUSED;
        }
 
-       ip->i_inode.i_ctime = current_time(&ip->i_inode);
+       inode_set_ctime_current(&ip->i_inode);
        __mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
 
        gfs2_trans_end(GFS2_SB(&ip->i_inode));
index d365bf0..632c226 100644 (file)
@@ -133,7 +133,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, const struct qstr *str, struct i
                goto err1;
 
        dir->i_size++;
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        hfs_find_exit(&fd);
        return 0;
@@ -269,7 +269,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, const struct qstr *str)
        }
 
        dir->i_size--;
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        res = 0;
 out:
@@ -337,7 +337,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name,
        if (err)
                goto out;
        dst_dir->i_size++;
-       dst_dir->i_mtime = dst_dir->i_ctime = current_time(dst_dir);
+       dst_dir->i_mtime = inode_set_ctime_current(dst_dir);
        mark_inode_dirty(dst_dir);
 
        /* finally remove the old entry */
@@ -349,7 +349,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name,
        if (err)
                goto out;
        src_dir->i_size--;
-       src_dir->i_mtime = src_dir->i_ctime = current_time(src_dir);
+       src_dir->i_mtime = inode_set_ctime_current(src_dir);
        mark_inode_dirty(src_dir);
 
        type = entry.type;
index 3e1e3dc..b75c260 100644 (file)
@@ -263,7 +263,7 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry)
        if (res)
                return res;
        clear_nlink(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        hfs_delete_inode(inode);
        mark_inode_dirty(inode);
        return 0;
index 441d7fc..ee349b7 100644 (file)
@@ -200,7 +200,7 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
        set_nlink(inode, 1);
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        HFS_I(inode)->flags = 0;
        HFS_I(inode)->rsrc_inode = NULL;
        HFS_I(inode)->fs_blocks = 0;
@@ -355,8 +355,8 @@ static int hfs_read_inode(struct inode *inode, void *data)
                        inode->i_mode |= S_IWUGO;
                inode->i_mode &= ~hsb->s_file_umask;
                inode->i_mode |= S_IFREG;
-               inode->i_ctime = inode->i_atime = inode->i_mtime =
-                               hfs_m_to_utime(rec->file.MdDat);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode,
+                                                                       hfs_m_to_utime(rec->file.MdDat));
                inode->i_op = &hfs_file_inode_operations;
                inode->i_fop = &hfs_file_operations;
                inode->i_mapping->a_ops = &hfs_aops;
@@ -366,8 +366,8 @@ static int hfs_read_inode(struct inode *inode, void *data)
                inode->i_size = be16_to_cpu(rec->dir.Val) + 2;
                HFS_I(inode)->fs_blocks = 0;
                inode->i_mode = S_IFDIR | (S_IRWXUGO & ~hsb->s_dir_umask);
-               inode->i_ctime = inode->i_atime = inode->i_mtime =
-                               hfs_m_to_utime(rec->dir.MdDat);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode,
+                                                                       hfs_m_to_utime(rec->dir.MdDat));
                inode->i_op = &hfs_dir_inode_operations;
                inode->i_fop = &hfs_dir_operations;
                break;
@@ -654,8 +654,7 @@ int hfs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 
                truncate_setsize(inode, attr->ia_size);
                hfs_file_truncate(inode);
-               inode->i_atime = inode->i_mtime = inode->i_ctime =
-                                                 current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        }
 
        setattr_copy(&nop_mnt_idmap, inode, attr);
index 2875961..dc27d41 100644 (file)
@@ -28,7 +28,9 @@ static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
        /* fix up inode on a timezone change */
        diff = sys_tz.tz_minuteswest * 60 - HFS_I(inode)->tz_secondswest;
        if (diff) {
-               inode->i_ctime.tv_sec += diff;
+               struct timespec64 ctime = inode_get_ctime(inode);
+
+               inode_set_ctime(inode, ctime.tv_sec + diff, ctime.tv_nsec);
                inode->i_atime.tv_sec += diff;
                inode->i_mtime.tv_sec += diff;
                HFS_I(inode)->tz_secondswest += diff;
index 35472cb..e71ae25 100644 (file)
@@ -312,7 +312,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
        dir->i_size++;
        if (S_ISDIR(inode->i_mode))
                hfsplus_subfolders_inc(dir);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
 
        hfs_find_exit(&fd);
@@ -417,7 +417,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, const struct qstr *str)
        dir->i_size--;
        if (type == HFSPLUS_FOLDER)
                hfsplus_subfolders_dec(dir);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
 
        if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) {
@@ -494,7 +494,7 @@ int hfsplus_rename_cat(u32 cnid,
        dst_dir->i_size++;
        if (type == HFSPLUS_FOLDER)
                hfsplus_subfolders_inc(dst_dir);
-       dst_dir->i_mtime = dst_dir->i_ctime = current_time(dst_dir);
+       dst_dir->i_mtime = inode_set_ctime_current(dst_dir);
 
        /* finally remove the old entry */
        err = hfsplus_cat_build_key(sb, src_fd.search_key,
@@ -511,7 +511,7 @@ int hfsplus_rename_cat(u32 cnid,
        src_dir->i_size--;
        if (type == HFSPLUS_FOLDER)
                hfsplus_subfolders_dec(src_dir);
-       src_dir->i_mtime = src_dir->i_ctime = current_time(src_dir);
+       src_dir->i_mtime = inode_set_ctime_current(src_dir);
 
        /* remove old thread entry */
        hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
index 56fb5f1..f5c4b3e 100644 (file)
@@ -346,7 +346,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
        inc_nlink(inode);
        hfsplus_instantiate(dst_dentry, inode, cnid);
        ihold(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        sbi->file_count++;
        hfsplus_mark_mdb_dirty(dst_dir->i_sb);
@@ -405,7 +405,7 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
                        hfsplus_delete_inode(inode);
        } else
                sbi->file_count--;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 out:
        mutex_unlock(&sbi->vh_mutex);
@@ -426,7 +426,7 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry)
        if (res)
                goto out;
        clear_nlink(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        hfsplus_delete_inode(inode);
        mark_inode_dirty(inode);
 out:
index 7d1a675..c65c8c4 100644 (file)
@@ -267,7 +267,7 @@ static int hfsplus_setattr(struct mnt_idmap *idmap,
                }
                truncate_setsize(inode, attr->ia_size);
                hfsplus_file_truncate(inode);
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
        }
 
        setattr_copy(&nop_mnt_idmap, inode, attr);
@@ -298,7 +298,7 @@ int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path,
        stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
                                 STATX_ATTR_NODUMP;
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        return 0;
 }
 
@@ -392,7 +392,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
        inode->i_ino = sbi->next_cnid++;
        inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
        set_nlink(inode, 1);
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 
        hip = HFSPLUS_I(inode);
        INIT_LIST_HEAD(&hip->open_dir_list);
@@ -523,7 +523,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
                inode->i_size = 2 + be32_to_cpu(folder->valence);
                inode->i_atime = hfsp_mt2ut(folder->access_date);
                inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
-               inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date);
+               inode_set_ctime_to_ts(inode,
+                                     hfsp_mt2ut(folder->attribute_mod_date));
                HFSPLUS_I(inode)->create_date = folder->create_date;
                HFSPLUS_I(inode)->fs_blocks = 0;
                if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) {
@@ -564,7 +565,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
                }
                inode->i_atime = hfsp_mt2ut(file->access_date);
                inode->i_mtime = hfsp_mt2ut(file->content_mod_date);
-               inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date);
+               inode_set_ctime_to_ts(inode,
+                                     hfsp_mt2ut(file->attribute_mod_date));
                HFSPLUS_I(inode)->create_date = file->create_date;
        } else {
                pr_err("bad catalog entry used to create inode\n");
@@ -609,7 +611,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
                hfsplus_cat_set_perms(inode, &folder->permissions);
                folder->access_date = hfsp_ut2mt(inode->i_atime);
                folder->content_mod_date = hfsp_ut2mt(inode->i_mtime);
-               folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime);
+               folder->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode));
                folder->valence = cpu_to_be32(inode->i_size - 2);
                if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) {
                        folder->subfolders =
@@ -644,7 +646,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
                        file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED);
                file->access_date = hfsp_ut2mt(inode->i_atime);
                file->content_mod_date = hfsp_ut2mt(inode->i_mtime);
-               file->attribute_mod_date = hfsp_ut2mt(inode->i_ctime);
+               file->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode));
                hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
                                         sizeof(struct hfsplus_cat_file));
        }
@@ -700,7 +702,7 @@ int hfsplus_fileattr_set(struct mnt_idmap *idmap,
        else
                hip->userflags &= ~HFSPLUS_FLG_NODUMP;
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 
        return 0;
index 4638709..dc5a5ce 100644 (file)
@@ -517,8 +517,7 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)
                (struct timespec64){ st->atime.tv_sec, st->atime.tv_nsec };
        ino->i_mtime =
                (struct timespec64){ st->mtime.tv_sec, st->mtime.tv_nsec };
-       ino->i_ctime =
-               (struct timespec64){ st->ctime.tv_sec, st->ctime.tv_nsec };
+       inode_set_ctime(ino, st->ctime.tv_sec, st->ctime.tv_nsec);
        ino->i_size = st->size;
        ino->i_blocks = st->blocks;
        return 0;
index f32f156..f36566d 100644 (file)
@@ -277,10 +277,10 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
         * inode.
         */
 
-       if (!result->i_ctime.tv_sec) {
-               if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date))))
-                       result->i_ctime.tv_sec = 1;
-               result->i_ctime.tv_nsec = 0;
+       if (!inode_get_ctime(result).tv_sec) {
+               time64_t csec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date));
+
+               inode_set_ctime(result, csec ? csec : 1, 0);
                result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date));
                result->i_mtime.tv_nsec = 0;
                result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date));
index e50e92a..4791663 100644 (file)
@@ -36,7 +36,7 @@ void hpfs_init_inode(struct inode *i)
        hpfs_inode->i_rddir_off = NULL;
        hpfs_inode->i_dirty = 0;
 
-       i->i_ctime.tv_sec = i->i_ctime.tv_nsec = 0;
+       inode_set_ctime(i, 0, 0);
        i->i_mtime.tv_sec = i->i_mtime.tv_nsec = 0;
        i->i_atime.tv_sec = i->i_atime.tv_nsec = 0;
 }
@@ -232,7 +232,7 @@ void hpfs_write_inode_nolock(struct inode *i)
        if (de) {
                de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
                de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
-               de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
+               de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, inode_get_ctime(i).tv_sec));
                de->read_only = !(i->i_mode & 0222);
                de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size);
                hpfs_mark_4buffers_dirty(&qbh);
@@ -242,7 +242,7 @@ void hpfs_write_inode_nolock(struct inode *i)
                if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) {
                        de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
                        de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
-                       de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
+                       de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, inode_get_ctime(i).tv_sec));
                        de->read_only = !(i->i_mode & 0222);
                        de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0);
                        de->file_size = cpu_to_le32(0);
index 69fb40b..f4eb8d6 100644 (file)
@@ -13,10 +13,9 @@ static void hpfs_update_directory_times(struct inode *dir)
 {
        time64_t t = local_to_gmt(dir->i_sb, local_get_seconds(dir->i_sb));
        if (t == dir->i_mtime.tv_sec &&
-           t == dir->i_ctime.tv_sec)
+           t == inode_get_ctime(dir).tv_sec)
                return;
-       dir->i_mtime.tv_sec = dir->i_ctime.tv_sec = t;
-       dir->i_mtime.tv_nsec = dir->i_ctime.tv_nsec = 0;
+       dir->i_mtime = inode_set_ctime(dir, t, 0);
        hpfs_write_inode_nolock(dir);
 }
 
@@ -59,10 +58,8 @@ static int hpfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
        result->i_ino = fno;
        hpfs_i(result)->i_parent_dir = dir->i_ino;
        hpfs_i(result)->i_dno = dno;
-       result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
-       result->i_ctime.tv_nsec = 0; 
-       result->i_mtime.tv_nsec = 0; 
-       result->i_atime.tv_nsec = 0; 
+       result->i_mtime = result->i_atime =
+               inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
        hpfs_i(result)->i_ea_size = 0;
        result->i_mode |= S_IFDIR;
        result->i_op = &hpfs_dir_iops;
@@ -167,10 +164,8 @@ static int hpfs_create(struct mnt_idmap *idmap, struct inode *dir,
        result->i_fop = &hpfs_file_ops;
        set_nlink(result, 1);
        hpfs_i(result)->i_parent_dir = dir->i_ino;
-       result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
-       result->i_ctime.tv_nsec = 0;
-       result->i_mtime.tv_nsec = 0;
-       result->i_atime.tv_nsec = 0;
+       result->i_mtime = result->i_atime =
+               inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
        hpfs_i(result)->i_ea_size = 0;
        if (dee.read_only)
                result->i_mode &= ~0222;
@@ -250,10 +245,8 @@ static int hpfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
        hpfs_init_inode(result);
        result->i_ino = fno;
        hpfs_i(result)->i_parent_dir = dir->i_ino;
-       result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
-       result->i_ctime.tv_nsec = 0;
-       result->i_mtime.tv_nsec = 0;
-       result->i_atime.tv_nsec = 0;
+       result->i_mtime = result->i_atime =
+               inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
        hpfs_i(result)->i_ea_size = 0;
        result->i_uid = current_fsuid();
        result->i_gid = current_fsgid();
@@ -326,10 +319,8 @@ static int hpfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
        result->i_ino = fno;
        hpfs_init_inode(result);
        hpfs_i(result)->i_parent_dir = dir->i_ino;
-       result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
-       result->i_ctime.tv_nsec = 0;
-       result->i_mtime.tv_nsec = 0;
-       result->i_atime.tv_nsec = 0;
+       result->i_mtime = result->i_atime =
+               inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
        hpfs_i(result)->i_ea_size = 0;
        result->i_mode = S_IFLNK | 0777;
        result->i_uid = current_fsuid();
index 1cb8959..758a515 100644 (file)
@@ -729,8 +729,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
                root->i_atime.tv_nsec = 0;
                root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date));
                root->i_mtime.tv_nsec = 0;
-               root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date));
-               root->i_ctime.tv_nsec = 0;
+               inode_set_ctime(root,
+                               local_to_gmt(s, le32_to_cpu(de->creation_date)),
+                               0);
                hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size);
                hpfs_i(root)->i_parent_dir = root->i_ino;
                if (root->i_size == -1)
index 7b17ccf..93d3bcf 100644 (file)
@@ -887,7 +887,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 
        if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
                i_size_write(inode, offset + len);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
 out:
        inode_unlock(inode);
        return error;
@@ -935,7 +935,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
                inode->i_mode = S_IFDIR | ctx->mode;
                inode->i_uid = ctx->uid;
                inode->i_gid = ctx->gid;
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                inode->i_op = &hugetlbfs_dir_inode_operations;
                inode->i_fop = &simple_dir_operations;
                /* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -979,7 +979,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
                                &hugetlbfs_i_mmap_rwsem_key);
                inode->i_mapping->a_ops = &hugetlbfs_aops;
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                inode->i_mapping->private_data = resv_map;
                info->seals = F_SEAL_SEAL;
                switch (mode & S_IFMT) {
@@ -1022,7 +1022,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
        inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
        if (!inode)
                return -ENOSPC;
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        d_instantiate(dentry, inode);
        dget(dentry);/* Extra count - pin the dentry in core */
        return 0;
@@ -1054,7 +1054,7 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
        inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0);
        if (!inode)
                return -ENOSPC;
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        d_tmpfile(file, inode);
        return finish_open_simple(file, 0);
 }
@@ -1076,7 +1076,7 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap,
                } else
                        iput(inode);
        }
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 
        return error;
 }
index 8fefb69..35fd688 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/posix_acl.h>
-#include <linux/prefetch.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
 #include <linux/list_lru.h>
@@ -752,16 +751,11 @@ EXPORT_SYMBOL_GPL(evict_inodes);
 /**
  * invalidate_inodes   - attempt to free all inodes on a superblock
  * @sb:                superblock to operate on
- * @kill_dirty: flag to guide handling of dirty inodes
  *
- * Attempts to free all inodes for a given superblock.  If there were any
- * busy inodes return a non-zero value, else zero.
- * If @kill_dirty is set, discard dirty inodes too, otherwise treat
- * them as busy.
+ * Attempts to free all inodes (including dirty inodes) for a given superblock.
  */
-int invalidate_inodes(struct super_block *sb, bool kill_dirty)
+void invalidate_inodes(struct super_block *sb)
 {
-       int busy = 0;
        struct inode *inode, *next;
        LIST_HEAD(dispose);
 
@@ -773,14 +767,8 @@ again:
                        spin_unlock(&inode->i_lock);
                        continue;
                }
-               if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
-                       spin_unlock(&inode->i_lock);
-                       busy = 1;
-                       continue;
-               }
                if (atomic_read(&inode->i_count)) {
                        spin_unlock(&inode->i_lock);
-                       busy = 1;
                        continue;
                }
 
@@ -798,8 +786,6 @@ again:
        spin_unlock(&sb->s_inode_list_lock);
 
        dispose_list(&dispose);
-
-       return busy;
 }
 
 /*
@@ -1041,8 +1027,6 @@ struct inode *new_inode(struct super_block *sb)
 {
        struct inode *inode;
 
-       spin_lock_prefetch(&sb->s_inode_list_lock);
-
        inode = new_inode_pseudo(sb);
        if (inode)
                inode_sb_list_add(inode);
@@ -1853,6 +1837,7 @@ EXPORT_SYMBOL(bmap);
 static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
                             struct timespec64 now)
 {
+       struct timespec64 ctime;
 
        if (!(mnt->mnt_flags & MNT_RELATIME))
                return 1;
@@ -1864,7 +1849,8 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
        /*
         * Is ctime younger than or equal to atime? If yes, update atime:
         */
-       if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0)
+       ctime = inode_get_ctime(inode);
+       if (timespec64_compare(&ctime, &inode->i_atime) >= 0)
                return 1;
 
        /*
@@ -1879,29 +1865,76 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
        return 0;
 }
 
-int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
+/**
+ * inode_update_timestamps - update the timestamps on the inode
+ * @inode: inode to be updated
+ * @flags: S_* flags that needed to be updated
+ *
+ * The update_time function is called when an inode's timestamps need to be
+ * updated for a read or write operation. This function handles updating the
+ * actual timestamps. It's up to the caller to ensure that the inode is marked
+ * dirty appropriately.
+ *
+ * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
+ * attempt to update all three of them. S_ATIME updates can be handled
+ * independently of the rest.
+ *
+ * Returns a set of S_* flags indicating which values changed.
+ */
+int inode_update_timestamps(struct inode *inode, int flags)
 {
-       int dirty_flags = 0;
+       int updated = 0;
+       struct timespec64 now;
 
-       if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
-               if (flags & S_ATIME)
-                       inode->i_atime = *time;
-               if (flags & S_CTIME)
-                       inode->i_ctime = *time;
-               if (flags & S_MTIME)
-                       inode->i_mtime = *time;
-
-               if (inode->i_sb->s_flags & SB_LAZYTIME)
-                       dirty_flags |= I_DIRTY_TIME;
-               else
-                       dirty_flags |= I_DIRTY_SYNC;
+       if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
+               struct timespec64 ctime = inode_get_ctime(inode);
+
+               now = inode_set_ctime_current(inode);
+               if (!timespec64_equal(&now, &ctime))
+                       updated |= S_CTIME;
+               if (!timespec64_equal(&now, &inode->i_mtime)) {
+                       inode->i_mtime = now;
+                       updated |= S_MTIME;
+               }
+               if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
+                       updated |= S_VERSION;
+       } else {
+               now = current_time(inode);
        }
 
-       if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
-               dirty_flags |= I_DIRTY_SYNC;
+       if (flags & S_ATIME) {
+               if (!timespec64_equal(&now, &inode->i_atime)) {
+                       inode->i_atime = now;
+                       updated |= S_ATIME;
+               }
+       }
+       return updated;
+}
+EXPORT_SYMBOL(inode_update_timestamps);
 
+/**
+ * generic_update_time - update the timestamps on the inode
+ * @inode: inode to be updated
+ * @flags: S_* flags that needed to be updated
+ *
+ * The update_time function is called when an inode's timestamps need to be
+ * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
+ * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
+ * updates can be handled done independently of the rest.
+ *
+ * Returns a S_* mask indicating which fields were updated.
+ */
+int generic_update_time(struct inode *inode, int flags)
+{
+       int updated = inode_update_timestamps(inode, flags);
+       int dirty_flags = 0;
+
+       if (updated & (S_ATIME|S_MTIME|S_CTIME))
+               dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
+       if (updated & S_VERSION)
+               dirty_flags |= I_DIRTY_SYNC;
        __mark_inode_dirty(inode, dirty_flags);
-       return 0;
+       return updated;
 }
 EXPORT_SYMBOL(generic_update_time);
 
@@ -1909,11 +1942,12 @@ EXPORT_SYMBOL(generic_update_time);
  * This does the actual work of updating an inodes time or version.  Must have
  * had called mnt_want_write() before calling this.
  */
-int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
+int inode_update_time(struct inode *inode, int flags)
 {
        if (inode->i_op->update_time)
-               return inode->i_op->update_time(inode, time, flags);
-       return generic_update_time(inode, time, flags);
+               return inode->i_op->update_time(inode, flags);
+       generic_update_time(inode, flags);
+       return 0;
 }
 EXPORT_SYMBOL(inode_update_time);
 
@@ -1965,7 +1999,6 @@ void touch_atime(const struct path *path)
 {
        struct vfsmount *mnt = path->mnt;
        struct inode *inode = d_inode(path->dentry);
-       struct timespec64 now;
 
        if (!atime_needs_update(path, inode))
                return;
@@ -1984,8 +2017,7 @@ void touch_atime(const struct path *path)
         * We may also fail on filesystems that have the ability to make parts
         * of the fs read only, e.g. subvolumes in Btrfs.
         */
-       now = current_time(inode);
-       inode_update_time(inode, &now, S_ATIME);
+       inode_update_time(inode, S_ATIME);
        __mnt_drop_write(mnt);
 skip_update:
        sb_end_write(inode->i_sb);
@@ -2070,18 +2102,63 @@ int file_remove_privs(struct file *file)
 }
 EXPORT_SYMBOL(file_remove_privs);
 
-static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
+/**
+ * current_mgtime - Return FS time (possibly fine-grained)
+ * @inode: inode.
+ *
+ * Return the current time truncated to the time granularity supported by
+ * the fs, as suitable for a ctime/mtime change. If the ctime is flagged
+ * as having been QUERIED, get a fine-grained timestamp.
+ */
+struct timespec64 current_mgtime(struct inode *inode)
+{
+       struct timespec64 now, ctime;
+       atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec;
+       long nsec = atomic_long_read(pnsec);
+
+       if (nsec & I_CTIME_QUERIED) {
+               ktime_get_real_ts64(&now);
+               return timestamp_truncate(now, inode);
+       }
+
+       ktime_get_coarse_real_ts64(&now);
+       now = timestamp_truncate(now, inode);
+
+       /*
+        * If we've recently fetched a fine-grained timestamp
+        * then the coarse-grained one may still be earlier than the
+        * existing ctime. Just keep the existing value if so.
+        */
+       ctime = inode_get_ctime(inode);
+       if (timespec64_compare(&ctime, &now) > 0)
+               now = ctime;
+
+       return now;
+}
+EXPORT_SYMBOL(current_mgtime);
+
+static struct timespec64 current_ctime(struct inode *inode)
+{
+       if (is_mgtime(inode))
+               return current_mgtime(inode);
+       return current_time(inode);
+}
+
+static int inode_needs_update_time(struct inode *inode)
 {
        int sync_it = 0;
+       struct timespec64 now = current_ctime(inode);
+       struct timespec64 ctime;
 
        /* First try to exhaust all avenues to not sync */
        if (IS_NOCMTIME(inode))
                return 0;
 
-       if (!timespec64_equal(&inode->i_mtime, now))
+       if (!timespec64_equal(&inode->i_mtime, &now))
                sync_it = S_MTIME;
 
-       if (!timespec64_equal(&inode->i_ctime, now))
+       ctime = inode_get_ctime(inode);
+       if (!timespec64_equal(&ctime, &now))
                sync_it |= S_CTIME;
 
        if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
@@ -2090,15 +2167,14 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
        return sync_it;
 }
 
-static int __file_update_time(struct file *file, struct timespec64 *now,
-                       int sync_mode)
+static int __file_update_time(struct file *file, int sync_mode)
 {
        int ret = 0;
        struct inode *inode = file_inode(file);
 
        /* try to update time settings */
        if (!__mnt_want_write_file(file)) {
-               ret = inode_update_time(inode, now, sync_mode);
+               ret = inode_update_time(inode, sync_mode);
                __mnt_drop_write_file(file);
        }
 
@@ -2123,13 +2199,12 @@ int file_update_time(struct file *file)
 {
        int ret;
        struct inode *inode = file_inode(file);
-       struct timespec64 now = current_time(inode);
 
-       ret = inode_needs_update_time(inode, &now);
+       ret = inode_needs_update_time(inode);
        if (ret <= 0)
                return ret;
 
-       return __file_update_time(file, &now, ret);
+       return __file_update_time(file, ret);
 }
 EXPORT_SYMBOL(file_update_time);
 
@@ -2152,7 +2227,6 @@ static int file_modified_flags(struct file *file, int flags)
 {
        int ret;
        struct inode *inode = file_inode(file);
-       struct timespec64 now = current_time(inode);
 
        /*
         * Clear the security bits if the process is not being run by root.
@@ -2165,13 +2239,13 @@ static int file_modified_flags(struct file *file, int flags)
        if (unlikely(file->f_mode & FMODE_NOCMTIME))
                return 0;
 
-       ret = inode_needs_update_time(inode, &now);
+       ret = inode_needs_update_time(inode);
        if (ret <= 0)
                return ret;
        if (flags & IOCB_NOWAIT)
                return -EAGAIN;
 
-       return __file_update_time(file, &now, ret);
+       return __file_update_time(file, ret);
 }
 
 /**
@@ -2491,15 +2565,59 @@ struct timespec64 current_time(struct inode *inode)
        struct timespec64 now;
 
        ktime_get_coarse_real_ts64(&now);
+       return timestamp_truncate(now, inode);
+}
+EXPORT_SYMBOL(current_time);
 
-       if (unlikely(!inode->i_sb)) {
-               WARN(1, "current_time() called with uninitialized super_block in the inode");
+/**
+ * inode_set_ctime_current - set the ctime to current_time
+ * @inode: inode
+ *
+ * Set the inode->i_ctime to the current value for the inode. Returns
+ * the current value that was assigned to i_ctime.
+ */
+struct timespec64 inode_set_ctime_current(struct inode *inode)
+{
+       struct timespec64 now;
+       struct timespec64 ctime;
+
+       ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec);
+       if (!(ctime.tv_nsec & I_CTIME_QUERIED)) {
+               now = current_time(inode);
+
+               /* Just copy it into place if it's not multigrain */
+               if (!is_mgtime(inode)) {
+                       inode_set_ctime_to_ts(inode, now);
+                       return now;
+               }
+
+               /*
+                * If we've recently updated with a fine-grained timestamp,
+                * then the coarse-grained one may still be earlier than the
+                * existing ctime. Just keep the existing value if so.
+                */
+               ctime.tv_sec = inode->__i_ctime.tv_sec;
+               if (timespec64_compare(&ctime, &now) > 0)
+                       return ctime;
+
+               /*
+                * Ctime updates are usually protected by the inode_lock, but
+                * we can still race with someone setting the QUERIED flag.
+                * Try to swap the new nsec value into place. If it's changed
+                * in the interim, then just go with a fine-grained timestamp.
+                */
+               if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec,
+                           now.tv_nsec) != ctime.tv_nsec)
+                       goto fine_grained;
+               inode->__i_ctime.tv_sec = now.tv_sec;
                return now;
        }
-
-       return timestamp_truncate(now, inode);
+fine_grained:
+       ktime_get_real_ts64(&now);
+       inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode));
+       return now;
 }
-EXPORT_SYMBOL(current_time);
+EXPORT_SYMBOL(inode_set_ctime_current);
 
 /**
  * in_group_or_capable - check whether caller is CAP_FSETID privileged
index f7a3dc1..74d3b16 100644 (file)
@@ -115,7 +115,7 @@ static inline void put_file_access(struct file *file)
  * super.c
  */
 extern int reconfigure_super(struct fs_context *);
-extern bool trylock_super(struct super_block *sb);
+extern bool super_trylock_shared(struct super_block *sb);
 struct super_block *user_get_super(dev_t, bool excl);
 void put_super(struct super_block *sb);
 extern bool mount_capable(struct fs_context *);
@@ -201,7 +201,7 @@ void lock_two_inodes(struct inode *inode1, struct inode *inode2,
  * fs-writeback.c
  */
 extern long get_nr_dirty_inodes(void);
-extern int invalidate_inodes(struct super_block *, bool);
+void invalidate_inodes(struct super_block *sb);
 
 /*
  * dcache.c
index 5b2481c..f5fd99d 100644 (file)
@@ -109,9 +109,6 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
  * Returns 0 on success, -errno on error, 1 if this was the last
  * extent that will fit in user array.
  */
-#define SET_UNKNOWN_FLAGS      (FIEMAP_EXTENT_DELALLOC)
-#define SET_NO_UNMOUNTED_IO_FLAGS      (FIEMAP_EXTENT_DATA_ENCRYPTED)
-#define SET_NOT_ALIGNED_FLAGS  (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
 int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
                            u64 phys, u64 len, u32 flags)
 {
@@ -127,6 +124,10 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
        if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
                return 1;
 
+#define SET_UNKNOWN_FLAGS      (FIEMAP_EXTENT_DELALLOC)
+#define SET_NO_UNMOUNTED_IO_FLAGS      (FIEMAP_EXTENT_DATA_ENCRYPTED)
+#define SET_NOT_ALIGNED_FLAGS  (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
+
        if (flags & SET_UNKNOWN_FLAGS)
                flags |= FIEMAP_EXTENT_UNKNOWN;
        if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
@@ -396,8 +397,8 @@ static int ioctl_fsfreeze(struct file *filp)
 
        /* Freeze */
        if (sb->s_op->freeze_super)
-               return sb->s_op->freeze_super(sb);
-       return freeze_super(sb);
+               return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+       return freeze_super(sb, FREEZE_HOLDER_USERSPACE);
 }
 
 static int ioctl_fsthaw(struct file *filp)
@@ -409,8 +410,8 @@ static int ioctl_fsthaw(struct file *filp)
 
        /* Thaw */
        if (sb->s_op->thaw_super)
-               return sb->s_op->thaw_super(sb);
-       return thaw_super(sb);
+               return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+       return thaw_super(sb, FREEZE_HOLDER_USERSPACE);
 }
 
 static int ioctl_file_dedupe_range(struct file *file,
@@ -877,6 +878,9 @@ out:
 #ifdef CONFIG_COMPAT
 /**
  * compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
+ * @file: The file to operate on.
+ * @cmd: The ioctl command number.
+ * @arg: The argument to the ioctl.
  *
  * This is not normally called as a function, but instead set in struct
  * file_operations as
index adb92cd..283fb96 100644 (file)
 
 #define IOEND_BATCH_SIZE       4096
 
+typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
 /*
- * Structure allocated for each folio when block size < folio size
- * to track sub-folio uptodate status and I/O completions.
+ * Structure allocated for each folio to track per-block uptodate, dirty state
+ * and I/O completions.
  */
-struct iomap_page {
+struct iomap_folio_state {
        atomic_t                read_bytes_pending;
        atomic_t                write_bytes_pending;
-       spinlock_t              uptodate_lock;
-       unsigned long           uptodate[];
+       spinlock_t              state_lock;
+
+       /*
+        * Each block has two bits in this bitmap:
+        * Bits [0..blocks_per_folio) has the uptodate status.
+        * Bits [b_p_f...(2*b_p_f))   has the dirty status.
+        */
+       unsigned long           state[];
 };
 
-static inline struct iomap_page *to_iomap_page(struct folio *folio)
+static struct bio_set iomap_ioend_bioset;
+
+static inline bool ifs_is_fully_uptodate(struct folio *folio,
+               struct iomap_folio_state *ifs)
 {
-       if (folio_test_private(folio))
-               return folio_get_private(folio);
-       return NULL;
+       struct inode *inode = folio->mapping->host;
+
+       return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio));
 }
 
-static struct bio_set iomap_ioend_bioset;
+static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs,
+               unsigned int block)
+{
+       return test_bit(block, ifs->state);
+}
+
+static void ifs_set_range_uptodate(struct folio *folio,
+               struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+       struct inode *inode = folio->mapping->host;
+       unsigned int first_blk = off >> inode->i_blkbits;
+       unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+       unsigned int nr_blks = last_blk - first_blk + 1;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ifs->state_lock, flags);
+       bitmap_set(ifs->state, first_blk, nr_blks);
+       if (ifs_is_fully_uptodate(folio, ifs))
+               folio_mark_uptodate(folio);
+       spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_set_range_uptodate(struct folio *folio, size_t off,
+               size_t len)
+{
+       struct iomap_folio_state *ifs = folio->private;
+
+       if (ifs)
+               ifs_set_range_uptodate(folio, ifs, off, len);
+       else
+               folio_mark_uptodate(folio);
+}
+
+static inline bool ifs_block_is_dirty(struct folio *folio,
+               struct iomap_folio_state *ifs, int block)
+{
+       struct inode *inode = folio->mapping->host;
+       unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+
+       return test_bit(block + blks_per_folio, ifs->state);
+}
+
+static void ifs_clear_range_dirty(struct folio *folio,
+               struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+       struct inode *inode = folio->mapping->host;
+       unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+       unsigned int first_blk = (off >> inode->i_blkbits);
+       unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+       unsigned int nr_blks = last_blk - first_blk + 1;
+       unsigned long flags;
 
-static struct iomap_page *
-iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags)
+       spin_lock_irqsave(&ifs->state_lock, flags);
+       bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks);
+       spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_clear_range_dirty(struct folio *folio, size_t off, size_t len)
 {
-       struct iomap_page *iop = to_iomap_page(folio);
+       struct iomap_folio_state *ifs = folio->private;
+
+       if (ifs)
+               ifs_clear_range_dirty(folio, ifs, off, len);
+}
+
+static void ifs_set_range_dirty(struct folio *folio,
+               struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+       struct inode *inode = folio->mapping->host;
+       unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+       unsigned int first_blk = (off >> inode->i_blkbits);
+       unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+       unsigned int nr_blks = last_blk - first_blk + 1;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ifs->state_lock, flags);
+       bitmap_set(ifs->state, first_blk + blks_per_folio, nr_blks);
+       spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len)
+{
+       struct iomap_folio_state *ifs = folio->private;
+
+       if (ifs)
+               ifs_set_range_dirty(folio, ifs, off, len);
+}
+
+static struct iomap_folio_state *ifs_alloc(struct inode *inode,
+               struct folio *folio, unsigned int flags)
+{
+       struct iomap_folio_state *ifs = folio->private;
        unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
        gfp_t gfp;
 
-       if (iop || nr_blocks <= 1)
-               return iop;
+       if (ifs || nr_blocks <= 1)
+               return ifs;
 
        if (flags & IOMAP_NOWAIT)
                gfp = GFP_NOWAIT;
        else
                gfp = GFP_NOFS | __GFP_NOFAIL;
 
-       iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
-                     gfp);
-       if (iop) {
-               spin_lock_init(&iop->uptodate_lock);
-               if (folio_test_uptodate(folio))
-                       bitmap_fill(iop->uptodate, nr_blocks);
-               folio_attach_private(folio, iop);
-       }
-       return iop;
+       /*
+        * ifs->state tracks two sets of state flags when the
+        * filesystem block size is smaller than the folio size.
+        * The first state tracks per-block uptodate and the
+        * second tracks per-block dirty state.
+        */
+       ifs = kzalloc(struct_size(ifs, state,
+                     BITS_TO_LONGS(2 * nr_blocks)), gfp);
+       if (!ifs)
+               return ifs;
+
+       spin_lock_init(&ifs->state_lock);
+       if (folio_test_uptodate(folio))
+               bitmap_set(ifs->state, 0, nr_blocks);
+       if (folio_test_dirty(folio))
+               bitmap_set(ifs->state, nr_blocks, nr_blocks);
+       folio_attach_private(folio, ifs);
+
+       return ifs;
 }
 
-static void iomap_page_release(struct folio *folio)
+static void ifs_free(struct folio *folio)
 {
-       struct iomap_page *iop = folio_detach_private(folio);
-       struct inode *inode = folio->mapping->host;
-       unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
+       struct iomap_folio_state *ifs = folio_detach_private(folio);
 
-       if (!iop)
+       if (!ifs)
                return;
-       WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
-       WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
-       WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
+       WARN_ON_ONCE(atomic_read(&ifs->read_bytes_pending));
+       WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending));
+       WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) !=
                        folio_test_uptodate(folio));
-       kfree(iop);
+       kfree(ifs);
 }
 
 /*
@@ -90,7 +194,7 @@ static void iomap_page_release(struct folio *folio)
 static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
                loff_t *pos, loff_t length, size_t *offp, size_t *lenp)
 {
-       struct iomap_page *iop = to_iomap_page(folio);
+       struct iomap_folio_state *ifs = folio->private;
        loff_t orig_pos = *pos;
        loff_t isize = i_size_read(inode);
        unsigned block_bits = inode->i_blkbits;
@@ -105,12 +209,12 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
         * per-block uptodate status and adjust the offset and length if needed
         * to avoid reading in already uptodate ranges.
         */
-       if (iop) {
+       if (ifs) {
                unsigned int i;
 
                /* move forward for each leading block marked uptodate */
                for (i = first; i <= last; i++) {
-                       if (!test_bit(i, iop->uptodate))
+                       if (!ifs_block_is_uptodate(ifs, i))
                                break;
                        *pos += block_size;
                        poff += block_size;
@@ -120,7 +224,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
 
                /* truncate len if we find any trailing uptodate block(s) */
                for ( ; i <= last; i++) {
-                       if (test_bit(i, iop->uptodate)) {
+                       if (ifs_block_is_uptodate(ifs, i)) {
                                plen -= (last - i + 1) * block_size;
                                last = i - 1;
                                break;
@@ -144,43 +248,19 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
        *lenp = plen;
 }
 
-static void iomap_iop_set_range_uptodate(struct folio *folio,
-               struct iomap_page *iop, size_t off, size_t len)
-{
-       struct inode *inode = folio->mapping->host;
-       unsigned first = off >> inode->i_blkbits;
-       unsigned last = (off + len - 1) >> inode->i_blkbits;
-       unsigned long flags;
-
-       spin_lock_irqsave(&iop->uptodate_lock, flags);
-       bitmap_set(iop->uptodate, first, last - first + 1);
-       if (bitmap_full(iop->uptodate, i_blocks_per_folio(inode, folio)))
-               folio_mark_uptodate(folio);
-       spin_unlock_irqrestore(&iop->uptodate_lock, flags);
-}
-
-static void iomap_set_range_uptodate(struct folio *folio,
-               struct iomap_page *iop, size_t off, size_t len)
-{
-       if (iop)
-               iomap_iop_set_range_uptodate(folio, iop, off, len);
-       else
-               folio_mark_uptodate(folio);
-}
-
 static void iomap_finish_folio_read(struct folio *folio, size_t offset,
                size_t len, int error)
 {
-       struct iomap_page *iop = to_iomap_page(folio);
+       struct iomap_folio_state *ifs = folio->private;
 
        if (unlikely(error)) {
                folio_clear_uptodate(folio);
                folio_set_error(folio);
        } else {
-               iomap_set_range_uptodate(folio, iop, offset, len);
+               iomap_set_range_uptodate(folio, offset, len);
        }
 
-       if (!iop || atomic_sub_and_test(len, &iop->read_bytes_pending))
+       if (!ifs || atomic_sub_and_test(len, &ifs->read_bytes_pending))
                folio_unlock(folio);
 }
 
@@ -213,7 +293,6 @@ struct iomap_readpage_ctx {
 static int iomap_read_inline_data(const struct iomap_iter *iter,
                struct folio *folio)
 {
-       struct iomap_page *iop;
        const struct iomap *iomap = iomap_iter_srcmap(iter);
        size_t size = i_size_read(iter->inode) - iomap->offset;
        size_t poff = offset_in_page(iomap->offset);
@@ -231,15 +310,13 @@ static int iomap_read_inline_data(const struct iomap_iter *iter,
        if (WARN_ON_ONCE(size > iomap->length))
                return -EIO;
        if (offset > 0)
-               iop = iomap_page_create(iter->inode, folio, iter->flags);
-       else
-               iop = to_iomap_page(folio);
+               ifs_alloc(iter->inode, folio, iter->flags);
 
        addr = kmap_local_folio(folio, offset);
        memcpy(addr, iomap->inline_data, size);
        memset(addr + size, 0, PAGE_SIZE - poff - size);
        kunmap_local(addr);
-       iomap_set_range_uptodate(folio, iop, offset, PAGE_SIZE - poff);
+       iomap_set_range_uptodate(folio, offset, PAGE_SIZE - poff);
        return 0;
 }
 
@@ -260,7 +337,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
        loff_t pos = iter->pos + offset;
        loff_t length = iomap_length(iter) - offset;
        struct folio *folio = ctx->cur_folio;
-       struct iomap_page *iop;
+       struct iomap_folio_state *ifs;
        loff_t orig_pos = pos;
        size_t poff, plen;
        sector_t sector;
@@ -269,20 +346,20 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
                return iomap_read_inline_data(iter, folio);
 
        /* zero post-eof blocks as the page may be mapped */
-       iop = iomap_page_create(iter->inode, folio, iter->flags);
+       ifs = ifs_alloc(iter->inode, folio, iter->flags);
        iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen);
        if (plen == 0)
                goto done;
 
        if (iomap_block_needs_zeroing(iter, pos)) {
                folio_zero_range(folio, poff, plen);
-               iomap_set_range_uptodate(folio, iop, poff, plen);
+               iomap_set_range_uptodate(folio, poff, plen);
                goto done;
        }
 
        ctx->cur_folio_in_bio = true;
-       if (iop)
-               atomic_add(plen, &iop->read_bytes_pending);
+       if (ifs)
+               atomic_add(plen, &ifs->read_bytes_pending);
 
        sector = iomap_sector(iomap, pos);
        if (!ctx->bio ||
@@ -436,11 +513,11 @@ EXPORT_SYMBOL_GPL(iomap_readahead);
  */
 bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
 {
-       struct iomap_page *iop = to_iomap_page(folio);
+       struct iomap_folio_state *ifs = folio->private;
        struct inode *inode = folio->mapping->host;
        unsigned first, last, i;
 
-       if (!iop)
+       if (!ifs)
                return false;
 
        /* Caller's range may extend past the end of this folio */
@@ -451,7 +528,7 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
        last = (from + count - 1) >> inode->i_blkbits;
 
        for (i = first; i <= last; i++)
-               if (!test_bit(i, iop->uptodate))
+               if (!ifs_block_is_uptodate(ifs, i))
                        return false;
        return true;
 }
@@ -461,16 +538,18 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
  * iomap_get_folio - get a folio reference for writing
  * @iter: iteration structure
  * @pos: start offset of write
+ * @len: Suggested size of folio to create.
  *
  * Returns a locked reference to the folio at @pos, or an error pointer if the
  * folio could not be obtained.
  */
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos)
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len)
 {
-       unsigned fgp = FGP_WRITEBEGIN | FGP_NOFS;
+       fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS;
 
        if (iter->flags & IOMAP_NOWAIT)
                fgp |= FGP_NOWAIT;
+       fgp |= fgf_set_order(len);
 
        return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
                        fgp, mapping_gfp_mask(iter->inode->i_mapping));
@@ -483,14 +562,13 @@ bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags)
                        folio_size(folio));
 
        /*
-        * mm accommodates an old ext3 case where clean folios might
-        * not have had the dirty bit cleared.  Thus, it can send actual
-        * dirty folios to ->release_folio() via shrink_active_list();
-        * skip those here.
+        * If the folio is dirty, we refuse to release our metadata because
+        * it may be partially dirty.  Once we track per-block dirty state,
+        * we can release the metadata if every block is dirty.
         */
-       if (folio_test_dirty(folio) || folio_test_writeback(folio))
+       if (folio_test_dirty(folio))
                return false;
-       iomap_page_release(folio);
+       ifs_free(folio);
        return true;
 }
 EXPORT_SYMBOL_GPL(iomap_release_folio);
@@ -507,16 +585,22 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
        if (offset == 0 && len == folio_size(folio)) {
                WARN_ON_ONCE(folio_test_writeback(folio));
                folio_cancel_dirty(folio);
-               iomap_page_release(folio);
-       } else if (folio_test_large(folio)) {
-               /* Must release the iop so the page can be split */
-               WARN_ON_ONCE(!folio_test_uptodate(folio) &&
-                            folio_test_dirty(folio));
-               iomap_page_release(folio);
+               ifs_free(folio);
        }
 }
 EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
 
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio)
+{
+       struct inode *inode = mapping->host;
+       size_t len = folio_size(folio);
+
+       ifs_alloc(inode, folio, 0);
+       iomap_set_range_dirty(folio, 0, len);
+       return filemap_dirty_folio(mapping, folio);
+}
+EXPORT_SYMBOL_GPL(iomap_dirty_folio);
+
 static void
 iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
 {
@@ -547,7 +631,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
                size_t len, struct folio *folio)
 {
        const struct iomap *srcmap = iomap_iter_srcmap(iter);
-       struct iomap_page *iop;
+       struct iomap_folio_state *ifs;
        loff_t block_size = i_blocksize(iter->inode);
        loff_t block_start = round_down(pos, block_size);
        loff_t block_end = round_up(pos + len, block_size);
@@ -555,14 +639,23 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
        size_t from = offset_in_folio(folio, pos), to = from + len;
        size_t poff, plen;
 
-       if (folio_test_uptodate(folio))
+       /*
+        * If the write completely overlaps the current folio, then
+        * entire folio will be dirtied so there is no need for
+        * per-block state tracking structures to be attached to this folio.
+        */
+       if (pos <= folio_pos(folio) &&
+           pos + len >= folio_pos(folio) + folio_size(folio))
                return 0;
-       folio_clear_error(folio);
 
-       iop = iomap_page_create(iter->inode, folio, iter->flags);
-       if ((iter->flags & IOMAP_NOWAIT) && !iop && nr_blocks > 1)
+       ifs = ifs_alloc(iter->inode, folio, iter->flags);
+       if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1)
                return -EAGAIN;
 
+       if (folio_test_uptodate(folio))
+               return 0;
+       folio_clear_error(folio);
+
        do {
                iomap_adjust_read_range(iter->inode, folio, &block_start,
                                block_end - block_start, &poff, &plen);
@@ -589,7 +682,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
                        if (status)
                                return status;
                }
-               iomap_set_range_uptodate(folio, iop, poff, plen);
+               iomap_set_range_uptodate(folio, poff, plen);
        } while ((block_start += plen) < block_end);
 
        return 0;
@@ -603,7 +696,7 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
        if (folio_ops && folio_ops->get_folio)
                return folio_ops->get_folio(iter, pos, len);
        else
-               return iomap_get_folio(iter, pos);
+               return iomap_get_folio(iter, pos, len);
 }
 
 static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
@@ -696,7 +789,6 @@ out_unlock:
 static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
                size_t copied, struct folio *folio)
 {
-       struct iomap_page *iop = to_iomap_page(folio);
        flush_dcache_folio(folio);
 
        /*
@@ -712,7 +804,8 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
         */
        if (unlikely(copied < len && !folio_test_uptodate(folio)))
                return 0;
-       iomap_set_range_uptodate(folio, iop, offset_in_folio(folio, pos), len);
+       iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len);
+       iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied);
        filemap_dirty_folio(inode->i_mapping, folio);
        return copied;
 }
@@ -773,6 +866,7 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
 static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 {
        loff_t length = iomap_length(iter);
+       size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
        loff_t pos = iter->pos;
        ssize_t written = 0;
        long status = 0;
@@ -781,15 +875,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
 
        do {
                struct folio *folio;
-               struct page *page;
-               unsigned long offset;   /* Offset into pagecache page */
-               unsigned long bytes;    /* Bytes to write to page */
+               size_t offset;          /* Offset into folio */
+               size_t bytes;           /* Bytes to write to folio */
                size_t copied;          /* Bytes copied from user */
 
-               offset = offset_in_page(pos);
-               bytes = min_t(unsigned long, PAGE_SIZE - offset,
-                                               iov_iter_count(i));
-again:
+               offset = pos & (chunk - 1);
+               bytes = min(chunk - offset, iov_iter_count(i));
                status = balance_dirty_pages_ratelimited_flags(mapping,
                                                               bdp_flags);
                if (unlikely(status))
@@ -819,12 +910,14 @@ again:
                if (iter->iomap.flags & IOMAP_F_STALE)
                        break;
 
-               page = folio_file_page(folio, pos >> PAGE_SHIFT);
-               if (mapping_writably_mapped(mapping))
-                       flush_dcache_page(page);
+               offset = offset_in_folio(folio, pos);
+               if (bytes > folio_size(folio) - offset)
+                       bytes = folio_size(folio) - offset;
 
-               copied = copy_page_from_iter_atomic(page, offset, bytes, i);
+               if (mapping_writably_mapped(mapping))
+                       flush_dcache_folio(folio);
 
+               copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
                status = iomap_write_end(iter, pos, bytes, copied, folio);
 
                if (unlikely(copied != status))
@@ -840,11 +933,13 @@ again:
                         */
                        if (copied)
                                bytes = copied;
-                       goto again;
+                       if (chunk > PAGE_SIZE)
+                               chunk /= 2;
+               } else {
+                       pos += status;
+                       written += status;
+                       length -= status;
                }
-               pos += status;
-               written += status;
-               length -= status;
        } while (iov_iter_count(i) && length);
 
        if (status == -EAGAIN) {
@@ -872,14 +967,84 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
        while ((ret = iomap_iter(&iter, ops)) > 0)
                iter.processed = iomap_write_iter(&iter, i);
 
-       if (unlikely(ret < 0))
+       if (unlikely(iter.pos == iocb->ki_pos))
                return ret;
        ret = iter.pos - iocb->ki_pos;
-       iocb->ki_pos += ret;
+       iocb->ki_pos = iter.pos;
        return ret;
 }
 EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
 
+static int iomap_write_delalloc_ifs_punch(struct inode *inode,
+               struct folio *folio, loff_t start_byte, loff_t end_byte,
+               iomap_punch_t punch)
+{
+       unsigned int first_blk, last_blk, i;
+       loff_t last_byte;
+       u8 blkbits = inode->i_blkbits;
+       struct iomap_folio_state *ifs;
+       int ret = 0;
+
+       /*
+        * When we have per-block dirty tracking, there can be
+        * blocks within a folio which are marked uptodate
+        * but not dirty. In that case it is necessary to punch
+        * out such blocks to avoid leaking any delalloc blocks.
+        */
+       ifs = folio->private;
+       if (!ifs)
+               return ret;
+
+       last_byte = min_t(loff_t, end_byte - 1,
+                       folio_pos(folio) + folio_size(folio) - 1);
+       first_blk = offset_in_folio(folio, start_byte) >> blkbits;
+       last_blk = offset_in_folio(folio, last_byte) >> blkbits;
+       for (i = first_blk; i <= last_blk; i++) {
+               if (!ifs_block_is_dirty(folio, ifs, i)) {
+                       ret = punch(inode, folio_pos(folio) + (i << blkbits),
+                                   1 << blkbits);
+                       if (ret)
+                               return ret;
+               }
+       }
+
+       return ret;
+}
+
+
+static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio,
+               loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
+               iomap_punch_t punch)
+{
+       int ret = 0;
+
+       if (!folio_test_dirty(folio))
+               return ret;
+
+       /* if dirty, punch up to offset */
+       if (start_byte > *punch_start_byte) {
+               ret = punch(inode, *punch_start_byte,
+                               start_byte - *punch_start_byte);
+               if (ret)
+                       return ret;
+       }
+
+       /* Punch non-dirty blocks within folio */
+       ret = iomap_write_delalloc_ifs_punch(inode, folio, start_byte,
+                       end_byte, punch);
+       if (ret)
+               return ret;
+
+       /*
+        * Make sure the next punch start is correctly bound to
+        * the end of this data range, not the end of the folio.
+        */
+       *punch_start_byte = min_t(loff_t, end_byte,
+                               folio_pos(folio) + folio_size(folio));
+
+       return ret;
+}
+
 /*
  * Scan the data range passed to us for dirty page cache folios. If we find a
  * dirty folio, punch out the preceeding range and update the offset from which
@@ -899,10 +1064,11 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
  */
 static int iomap_write_delalloc_scan(struct inode *inode,
                loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
-               int (*punch)(struct inode *inode, loff_t offset, loff_t length))
+               iomap_punch_t punch)
 {
        while (start_byte < end_byte) {
                struct folio    *folio;
+               int ret;
 
                /* grab locked page */
                folio = filemap_lock_folio(inode->i_mapping,
@@ -913,26 +1079,12 @@ static int iomap_write_delalloc_scan(struct inode *inode,
                        continue;
                }
 
-               /* if dirty, punch up to offset */
-               if (folio_test_dirty(folio)) {
-                       if (start_byte > *punch_start_byte) {
-                               int     error;
-
-                               error = punch(inode, *punch_start_byte,
-                                               start_byte - *punch_start_byte);
-                               if (error) {
-                                       folio_unlock(folio);
-                                       folio_put(folio);
-                                       return error;
-                               }
-                       }
-
-                       /*
-                        * Make sure the next punch start is correctly bound to
-                        * the end of this data range, not the end of the folio.
-                        */
-                       *punch_start_byte = min_t(loff_t, end_byte,
-                                       folio_next_index(folio) << PAGE_SHIFT);
+               ret = iomap_write_delalloc_punch(inode, folio, punch_start_byte,
+                                                start_byte, end_byte, punch);
+               if (ret) {
+                       folio_unlock(folio);
+                       folio_put(folio);
+                       return ret;
                }
 
                /* move offset to start of next folio in range */
@@ -977,8 +1129,7 @@ static int iomap_write_delalloc_scan(struct inode *inode,
  * the code to subtle off-by-one bugs....
  */
 static int iomap_write_delalloc_release(struct inode *inode,
-               loff_t start_byte, loff_t end_byte,
-               int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+               loff_t start_byte, loff_t end_byte, iomap_punch_t punch)
 {
        loff_t punch_start_byte = start_byte;
        loff_t scan_end_byte = min(i_size_read(inode), end_byte);
@@ -1071,8 +1222,7 @@ out_unlock:
  */
 int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
                struct iomap *iomap, loff_t pos, loff_t length,
-               ssize_t written,
-               int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+               ssize_t written, iomap_punch_t punch)
 {
        loff_t                  start_byte;
        loff_t                  end_byte;
@@ -1293,17 +1443,17 @@ EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
 static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
                size_t len, int error)
 {
-       struct iomap_page *iop = to_iomap_page(folio);
+       struct iomap_folio_state *ifs = folio->private;
 
        if (error) {
                folio_set_error(folio);
                mapping_set_error(inode->i_mapping, error);
        }
 
-       WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !iop);
-       WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0);
+       WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs);
+       WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0);
 
-       if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending))
+       if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending))
                folio_end_writeback(folio);
 }
 
@@ -1570,7 +1720,7 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
  */
 static void
 iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio,
-               struct iomap_page *iop, struct iomap_writepage_ctx *wpc,
+               struct iomap_folio_state *ifs, struct iomap_writepage_ctx *wpc,
                struct writeback_control *wbc, struct list_head *iolist)
 {
        sector_t sector = iomap_sector(&wpc->iomap, pos);
@@ -1588,8 +1738,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio,
                bio_add_folio_nofail(wpc->ioend->io_bio, folio, len, poff);
        }
 
-       if (iop)
-               atomic_add(len, &iop->write_bytes_pending);
+       if (ifs)
+               atomic_add(len, &ifs->write_bytes_pending);
        wpc->ioend->io_size += len;
        wbc_account_cgroup_owner(wbc, &folio->page, len);
 }
@@ -1615,7 +1765,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
                struct writeback_control *wbc, struct inode *inode,
                struct folio *folio, u64 end_pos)
 {
-       struct iomap_page *iop = iomap_page_create(inode, folio, 0);
+       struct iomap_folio_state *ifs = folio->private;
        struct iomap_ioend *ioend, *next;
        unsigned len = i_blocksize(inode);
        unsigned nblocks = i_blocks_per_folio(inode, folio);
@@ -1623,7 +1773,14 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
        int error = 0, count = 0, i;
        LIST_HEAD(submit_list);
 
-       WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
+       WARN_ON_ONCE(end_pos <= pos);
+
+       if (!ifs && nblocks > 1) {
+               ifs = ifs_alloc(inode, folio, 0);
+               iomap_set_range_dirty(folio, 0, end_pos - pos);
+       }
+
+       WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) != 0);
 
        /*
         * Walk through the folio to find areas to write back. If we
@@ -1631,7 +1788,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
         * invalid, grab a new one.
         */
        for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
-               if (iop && !test_bit(i, iop->uptodate))
+               if (ifs && !ifs_block_is_dirty(folio, ifs, i))
                        continue;
 
                error = wpc->ops->map_blocks(wpc, inode, pos);
@@ -1642,7 +1799,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
                        continue;
                if (wpc->iomap.type == IOMAP_HOLE)
                        continue;
-               iomap_add_to_ioend(inode, pos, folio, iop, wpc, wbc,
+               iomap_add_to_ioend(inode, pos, folio, ifs, wpc, wbc,
                                 &submit_list);
                count++;
        }
@@ -1675,6 +1832,12 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
                }
        }
 
+       /*
+        * We can have dirty bits set past end of file in page_mkwrite path
+        * while mapping the last partial folio. Hence it's better to clear
+        * all the dirty bits in the folio here.
+        */
+       iomap_clear_range_dirty(folio, 0, folio_size(folio));
        folio_start_writeback(folio);
        folio_unlock(folio);
 
index ea3b868..bcd3f8c 100644 (file)
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
  */
-#define IOMAP_DIO_WRITE_FUA    (1 << 28)
-#define IOMAP_DIO_NEED_SYNC    (1 << 29)
-#define IOMAP_DIO_WRITE                (1 << 30)
-#define IOMAP_DIO_DIRTY                (1 << 31)
+#define IOMAP_DIO_CALLER_COMP  (1U << 26)
+#define IOMAP_DIO_INLINE_COMP  (1U << 27)
+#define IOMAP_DIO_WRITE_THROUGH        (1U << 28)
+#define IOMAP_DIO_NEED_SYNC    (1U << 29)
+#define IOMAP_DIO_WRITE                (1U << 30)
+#define IOMAP_DIO_DIRTY                (1U << 31)
 
 struct iomap_dio {
        struct kiocb            *iocb;
@@ -41,7 +43,6 @@ struct iomap_dio {
                struct {
                        struct iov_iter         *iter;
                        struct task_struct      *waiter;
-                       struct bio              *poll_bio;
                } submit;
 
                /* used for aio completion: */
@@ -63,12 +64,14 @@ static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
 static void iomap_dio_submit_bio(const struct iomap_iter *iter,
                struct iomap_dio *dio, struct bio *bio, loff_t pos)
 {
+       struct kiocb *iocb = dio->iocb;
+
        atomic_inc(&dio->ref);
 
        /* Sync dio can't be polled reliably */
-       if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) {
-               bio_set_polled(bio, dio->iocb);
-               dio->submit.poll_bio = bio;
+       if ((iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(iocb)) {
+               bio_set_polled(bio, iocb);
+               WRITE_ONCE(iocb->private, bio);
        }
 
        if (dio->dops && dio->dops->submit_io)
@@ -130,6 +133,11 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
 }
 EXPORT_SYMBOL_GPL(iomap_dio_complete);
 
+static ssize_t iomap_dio_deferred_complete(void *data)
+{
+       return iomap_dio_complete(data);
+}
+
 static void iomap_dio_complete_work(struct work_struct *work)
 {
        struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
@@ -152,27 +160,69 @@ void iomap_dio_bio_end_io(struct bio *bio)
 {
        struct iomap_dio *dio = bio->bi_private;
        bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
+       struct kiocb *iocb = dio->iocb;
 
        if (bio->bi_status)
                iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
+       if (!atomic_dec_and_test(&dio->ref))
+               goto release_bio;
 
-       if (atomic_dec_and_test(&dio->ref)) {
-               if (dio->wait_for_completion) {
-                       struct task_struct *waiter = dio->submit.waiter;
-                       WRITE_ONCE(dio->submit.waiter, NULL);
-                       blk_wake_io_task(waiter);
-               } else if (dio->flags & IOMAP_DIO_WRITE) {
-                       struct inode *inode = file_inode(dio->iocb->ki_filp);
-
-                       WRITE_ONCE(dio->iocb->private, NULL);
-                       INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
-                       queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
-               } else {
-                       WRITE_ONCE(dio->iocb->private, NULL);
-                       iomap_dio_complete_work(&dio->aio.work);
-               }
+       /*
+        * Synchronous dio, task itself will handle any completion work
+        * that needs after IO. All we need to do is wake the task.
+        */
+       if (dio->wait_for_completion) {
+               struct task_struct *waiter = dio->submit.waiter;
+
+               WRITE_ONCE(dio->submit.waiter, NULL);
+               blk_wake_io_task(waiter);
+               goto release_bio;
+       }
+
+       /*
+        * Flagged with IOMAP_DIO_INLINE_COMP, we can complete it inline
+        */
+       if (dio->flags & IOMAP_DIO_INLINE_COMP) {
+               WRITE_ONCE(iocb->private, NULL);
+               iomap_dio_complete_work(&dio->aio.work);
+               goto release_bio;
+       }
+
+       /*
+        * If this dio is flagged with IOMAP_DIO_CALLER_COMP, then schedule
+        * our completion that way to avoid an async punt to a workqueue.
+        */
+       if (dio->flags & IOMAP_DIO_CALLER_COMP) {
+               /* only polled IO cares about private cleared */
+               iocb->private = dio;
+               iocb->dio_complete = iomap_dio_deferred_complete;
+
+               /*
+                * Invoke ->ki_complete() directly. We've assigned our
+                * dio_complete callback handler, and since the issuer set
+                * IOCB_DIO_CALLER_COMP, we know their ki_complete handler will
+                * notice ->dio_complete being set and will defer calling that
+                * handler until it can be done from a safe task context.
+                *
+                * Note that the 'res' being passed in here is not important
+                * for this case. The actual completion value of the request
+                * will be gotten from dio_complete when that is run by the
+                * issuer.
+                */
+               iocb->ki_complete(iocb, 0);
+               goto release_bio;
        }
 
+       /*
+        * Async DIO completion that requires filesystem level completion work
+        * gets punted to a work queue to complete as the operation may require
+        * more IO to be issued to finalise filesystem metadata changes or
+        * guarantee data integrity.
+        */
+       INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
+       queue_work(file_inode(iocb->ki_filp)->i_sb->s_dio_done_wq,
+                       &dio->aio.work);
+release_bio:
        if (should_dirty) {
                bio_check_pages_dirty(bio);
        } else {
@@ -203,7 +253,7 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
 /*
  * Figure out the bio's operation flags from the dio request, the
  * mapping, and whether or not we want FUA.  Note that we can end up
- * clearing the WRITE_FUA flag in the dio request.
+ * clearing the WRITE_THROUGH flag in the dio request.
  */
 static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
                const struct iomap *iomap, bool use_fua)
@@ -217,7 +267,7 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
        if (use_fua)
                opflags |= REQ_FUA;
        else
-               dio->flags &= ~IOMAP_DIO_WRITE_FUA;
+               dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
 
        return opflags;
 }
@@ -257,12 +307,19 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
                 * Use a FUA write if we need datasync semantics, this is a pure
                 * data IO that doesn't require any metadata updates (including
                 * after IO completion such as unwritten extent conversion) and
-                * the underlying device supports FUA. This allows us to avoid
-                * cache flushes on IO completion.
+                * the underlying device either supports FUA or doesn't have
+                * a volatile write cache. This allows us to avoid cache flushes
+                * on IO completion. If we can't use writethrough and need to
+                * sync, disable in-task completions as dio completion will
+                * need to call generic_write_sync() which will do a blocking
+                * fsync / cache flush call.
                 */
                if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
-                   (dio->flags & IOMAP_DIO_WRITE_FUA) && bdev_fua(iomap->bdev))
+                   (dio->flags & IOMAP_DIO_WRITE_THROUGH) &&
+                   (bdev_fua(iomap->bdev) || !bdev_write_cache(iomap->bdev)))
                        use_fua = true;
+               else if (dio->flags & IOMAP_DIO_NEED_SYNC)
+                       dio->flags &= ~IOMAP_DIO_CALLER_COMP;
        }
 
        /*
@@ -277,10 +334,23 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
                goto out;
 
        /*
-        * We can only poll for single bio I/Os.
+        * We can only do deferred completion for pure overwrites that
+        * don't require additional IO at completion. This rules out
+        * writes that need zeroing or extent conversion, extend
+        * the file size, or issue journal IO or cache flushes
+        * during completion processing.
         */
        if (need_zeroout ||
+           ((dio->flags & IOMAP_DIO_NEED_SYNC) && !use_fua) ||
            ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode)))
+               dio->flags &= ~IOMAP_DIO_CALLER_COMP;
+
+       /*
+        * The rules for polled IO completions follow the guidelines as the
+        * ones we set for inline and deferred completions. If none of those
+        * are available for this IO, clear the polled flag.
+        */
+       if (!(dio->flags & (IOMAP_DIO_INLINE_COMP|IOMAP_DIO_CALLER_COMP)))
                dio->iocb->ki_flags &= ~IOCB_HIPRI;
 
        if (need_zeroout) {
@@ -505,12 +575,14 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
        dio->submit.iter = iter;
        dio->submit.waiter = current;
-       dio->submit.poll_bio = NULL;
 
        if (iocb->ki_flags & IOCB_NOWAIT)
                iomi.flags |= IOMAP_NOWAIT;
 
        if (iov_iter_rw(iter) == READ) {
+               /* reads can always complete inline */
+               dio->flags |= IOMAP_DIO_INLINE_COMP;
+
                if (iomi.pos >= dio->i_size)
                        goto out_free_dio;
 
@@ -524,6 +596,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                iomi.flags |= IOMAP_WRITE;
                dio->flags |= IOMAP_DIO_WRITE;
 
+               /*
+                * Flag as supporting deferred completions, if the issuer
+                * groks it. This can avoid a workqueue punt for writes.
+                * We may later clear this flag if we need to do other IO
+                * as part of this IO completion.
+                */
+               if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
+                       dio->flags |= IOMAP_DIO_CALLER_COMP;
+
                if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
                        ret = -EAGAIN;
                        if (iomi.pos >= dio->i_size ||
@@ -537,13 +618,16 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                        dio->flags |= IOMAP_DIO_NEED_SYNC;
 
                       /*
-                       * For datasync only writes, we optimistically try
-                       * using FUA for this IO.  Any non-FUA write that
-                       * occurs will clear this flag, hence we know before
-                       * completion whether a cache flush is necessary.
+                       * For datasync only writes, we optimistically try using
+                       * WRITE_THROUGH for this IO. This flag requires either
+                       * FUA writes through the device's write cache, or a
+                       * normal write to a device without a volatile write
+                       * cache. For the former, Any non-FUA write that occurs
+                       * will clear this flag, hence we know before completion
+                       * whether a cache flush is necessary.
                        */
                        if (!(iocb->ki_flags & IOCB_SYNC))
-                               dio->flags |= IOMAP_DIO_WRITE_FUA;
+                               dio->flags |= IOMAP_DIO_WRITE_THROUGH;
                }
 
                /*
@@ -605,14 +689,13 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
                iomap_dio_set_error(dio, ret);
 
        /*
-        * If all the writes we issued were FUA, we don't need to flush the
-        * cache on IO completion. Clear the sync flag for this case.
+        * If all the writes we issued were already written through to the
+        * media, we don't need to flush the cache on IO completion. Clear the
+        * sync flag for this case.
         */
-       if (dio->flags & IOMAP_DIO_WRITE_FUA)
+       if (dio->flags & IOMAP_DIO_WRITE_THROUGH)
                dio->flags &= ~IOMAP_DIO_NEED_SYNC;
 
-       WRITE_ONCE(iocb->private, dio->submit.poll_bio);
-
        /*
         * We are about to drop our additional submission reference, which
         * might be the last reference to the dio.  There are three different
index df9d705..2ee2128 100644 (file)
@@ -1422,13 +1422,8 @@ static int isofs_read_inode(struct inode *inode, int relocated)
                        inode->i_ino, de->flags[-high_sierra]);
        }
 #endif
-
-       inode->i_mtime.tv_sec =
-       inode->i_atime.tv_sec =
-       inode->i_ctime.tv_sec = iso_date(de->date, high_sierra);
-       inode->i_mtime.tv_nsec =
-       inode->i_atime.tv_nsec =
-       inode->i_ctime.tv_nsec = 0;
+       inode->i_mtime = inode->i_atime =
+               inode_set_ctime(inode, iso_date(de->date, high_sierra), 0);
 
        ei->i_first_extent = (isonum_733(de->extent) +
                        isonum_711(de->ext_attr_length));
index 48f58c6..348783a 100644 (file)
@@ -421,10 +421,9 @@ repeat:
                        /* Rock ridge never appears on a High Sierra disk */
                        cnt = 0;
                        if (rr->u.TF.flags & TF_CREATE) {
-                               inode->i_ctime.tv_sec =
-                                   iso_date(rr->u.TF.times[cnt++].time,
-                                            0);
-                               inode->i_ctime.tv_nsec = 0;
+                               inode_set_ctime(inode,
+                                               iso_date(rr->u.TF.times[cnt++].time, 0),
+                                               0);
                        }
                        if (rr->u.TF.flags & TF_MODIFY) {
                                inode->i_mtime.tv_sec =
@@ -439,10 +438,9 @@ repeat:
                                inode->i_atime.tv_nsec = 0;
                        }
                        if (rr->u.TF.flags & TF_ATTRIBUTES) {
-                               inode->i_ctime.tv_sec =
-                                   iso_date(rr->u.TF.times[cnt++].time,
-                                            0);
-                               inode->i_ctime.tv_nsec = 0;
+                               inode_set_ctime(inode,
+                                               iso_date(rr->u.TF.times[cnt++].time, 0),
+                                               0);
                        }
                        break;
                case SIG('S', 'L'):
@@ -534,7 +532,7 @@ repeat:
                        inode->i_size = reloc->i_size;
                        inode->i_blocks = reloc->i_blocks;
                        inode->i_atime = reloc->i_atime;
-                       inode->i_ctime = reloc->i_ctime;
+                       inode_set_ctime_to_ts(inode, inode_get_ctime(reloc));
                        inode->i_mtime = reloc->i_mtime;
                        iput(reloc);
                        break;
index 51bd38d..9ec9101 100644 (file)
@@ -27,7 +27,7 @@
  *
  * Called with j_list_lock held.
  */
-static inline void __buffer_unlink_first(struct journal_head *jh)
+static inline void __buffer_unlink(struct journal_head *jh)
 {
        transaction_t *transaction = jh->b_cp_transaction;
 
@@ -41,45 +41,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh)
 }
 
 /*
- * Unlink a buffer from a transaction checkpoint(io) list.
- *
- * Called with j_list_lock held.
- */
-static inline void __buffer_unlink(struct journal_head *jh)
-{
-       transaction_t *transaction = jh->b_cp_transaction;
-
-       __buffer_unlink_first(jh);
-       if (transaction->t_checkpoint_io_list == jh) {
-               transaction->t_checkpoint_io_list = jh->b_cpnext;
-               if (transaction->t_checkpoint_io_list == jh)
-                       transaction->t_checkpoint_io_list = NULL;
-       }
-}
-
-/*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
-       transaction_t *transaction = jh->b_cp_transaction;
-
-       __buffer_unlink_first(jh);
-
-       if (!transaction->t_checkpoint_io_list) {
-               jh->b_cpnext = jh->b_cpprev = jh;
-       } else {
-               jh->b_cpnext = transaction->t_checkpoint_io_list;
-               jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
-               jh->b_cpprev->b_cpnext = jh;
-               jh->b_cpnext->b_cpprev = jh;
-       }
-       transaction->t_checkpoint_io_list = jh;
-}
-
-/*
  * Check a checkpoint buffer could be release or not.
  *
  * Requires j_list_lock
@@ -183,6 +144,7 @@ __flush_batch(journal_t *journal, int *batch_count)
                struct buffer_head *bh = journal->j_chkpt_bhs[i];
                BUFFER_TRACE(bh, "brelse");
                __brelse(bh);
+               journal->j_chkpt_bhs[i] = NULL;
        }
        *batch_count = 0;
 }
@@ -242,15 +204,6 @@ restart:
                jh = transaction->t_checkpoint_list;
                bh = jh2bh(jh);
 
-               if (buffer_locked(bh)) {
-                       get_bh(bh);
-                       spin_unlock(&journal->j_list_lock);
-                       wait_on_buffer(bh);
-                       /* the journal_head may have gone by now */
-                       BUFFER_TRACE(bh, "brelse");
-                       __brelse(bh);
-                       goto retry;
-               }
                if (jh->b_transaction != NULL) {
                        transaction_t *t = jh->b_transaction;
                        tid_t tid = t->t_tid;
@@ -285,30 +238,50 @@ restart:
                        spin_lock(&journal->j_list_lock);
                        goto restart;
                }
-               if (!buffer_dirty(bh)) {
+               if (!trylock_buffer(bh)) {
+                       /*
+                        * The buffer is locked, it may be writing back, or
+                        * flushing out in the last couple of cycles, or
+                        * re-adding into a new transaction, need to check
+                        * it again until it's unlocked.
+                        */
+                       get_bh(bh);
+                       spin_unlock(&journal->j_list_lock);
+                       wait_on_buffer(bh);
+                       /* the journal_head may have gone by now */
+                       BUFFER_TRACE(bh, "brelse");
+                       __brelse(bh);
+                       goto retry;
+               } else if (!buffer_dirty(bh)) {
+                       unlock_buffer(bh);
                        BUFFER_TRACE(bh, "remove from checkpoint");
-                       if (__jbd2_journal_remove_checkpoint(jh))
-                               /* The transaction was released; we're done */
+                       /*
+                        * If the transaction was released or the checkpoint
+                        * list was empty, we're done.
+                        */
+                       if (__jbd2_journal_remove_checkpoint(jh) ||
+                           !transaction->t_checkpoint_list)
                                goto out;
-                       continue;
+               } else {
+                       unlock_buffer(bh);
+                       /*
+                        * We are about to write the buffer, it could be
+                        * raced by some other transaction shrink or buffer
+                        * re-log logic once we release the j_list_lock,
+                        * leave it on the checkpoint list and check status
+                        * again to make sure it's clean.
+                        */
+                       BUFFER_TRACE(bh, "queue");
+                       get_bh(bh);
+                       J_ASSERT_BH(bh, !buffer_jwrite(bh));
+                       journal->j_chkpt_bhs[batch_count++] = bh;
+                       transaction->t_chp_stats.cs_written++;
+                       transaction->t_checkpoint_list = jh->b_cpnext;
                }
-               /*
-                * Important: we are about to write the buffer, and
-                * possibly block, while still holding the journal
-                * lock.  We cannot afford to let the transaction
-                * logic start messing around with this buffer before
-                * we write it to disk, as that would break
-                * recoverability.
-                */
-               BUFFER_TRACE(bh, "queue");
-               get_bh(bh);
-               J_ASSERT_BH(bh, !buffer_jwrite(bh));
-               journal->j_chkpt_bhs[batch_count++] = bh;
-               __buffer_relink_io(jh);
-               transaction->t_chp_stats.cs_written++;
+
                if ((batch_count == JBD2_NR_BATCH) ||
-                   need_resched() ||
-                   spin_needbreak(&journal->j_list_lock))
+                   need_resched() || spin_needbreak(&journal->j_list_lock) ||
+                   jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
                        goto unlock_and_flush;
        }
 
@@ -322,38 +295,6 @@ restart:
                        goto restart;
        }
 
-       /*
-        * Now we issued all of the transaction's buffers, let's deal
-        * with the buffers that are out for I/O.
-        */
-restart2:
-       /* Did somebody clean up the transaction in the meanwhile? */
-       if (journal->j_checkpoint_transactions != transaction ||
-           transaction->t_tid != this_tid)
-               goto out;
-
-       while (transaction->t_checkpoint_io_list) {
-               jh = transaction->t_checkpoint_io_list;
-               bh = jh2bh(jh);
-               if (buffer_locked(bh)) {
-                       get_bh(bh);
-                       spin_unlock(&journal->j_list_lock);
-                       wait_on_buffer(bh);
-                       /* the journal_head may have gone by now */
-                       BUFFER_TRACE(bh, "brelse");
-                       __brelse(bh);
-                       spin_lock(&journal->j_list_lock);
-                       goto restart2;
-               }
-
-               /*
-                * Now in whatever state the buffer currently is, we
-                * know that it has been written out and so we can
-                * drop it from the list
-                */
-               if (__jbd2_journal_remove_checkpoint(jh))
-                       break;
-       }
 out:
        spin_unlock(&journal->j_list_lock);
        result = jbd2_cleanup_journal_tail(journal);
@@ -409,49 +350,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 /* Checkpoint list management */
 
 /*
- * journal_clean_one_cp_list
- *
- * Find all the written-back checkpoint buffers in the given list and
- * release them. If 'destroy' is set, clean all buffers unconditionally.
- *
- * Called with j_list_lock held.
- * Returns 1 if we freed the transaction, 0 otherwise.
- */
-static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
-{
-       struct journal_head *last_jh;
-       struct journal_head *next_jh = jh;
-
-       if (!jh)
-               return 0;
-
-       last_jh = jh->b_cpprev;
-       do {
-               jh = next_jh;
-               next_jh = jh->b_cpnext;
-
-               if (!destroy && __cp_buffer_busy(jh))
-                       return 0;
-
-               if (__jbd2_journal_remove_checkpoint(jh))
-                       return 1;
-               /*
-                * This function only frees up some memory
-                * if possible so we dont have an obligation
-                * to finish processing. Bail out if preemption
-                * requested:
-                */
-               if (need_resched())
-                       return 0;
-       } while (jh != last_jh);
-
-       return 0;
-}
-
-/*
  * journal_shrink_one_cp_list
  *
- * Find 'nr_to_scan' written-back checkpoint buffers in the given list
+ * Find all the written-back checkpoint buffers in the given list
  * and try to release them. If the whole transaction is released, set
  * the 'released' parameter. Return the number of released checkpointed
  * buffers.
@@ -459,15 +360,15 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
  * Called with j_list_lock held.
  */
 static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
-                                               unsigned long *nr_to_scan,
-                                               bool *released)
+                                               bool destroy, bool *released)
 {
        struct journal_head *last_jh;
        struct journal_head *next_jh = jh;
        unsigned long nr_freed = 0;
        int ret;
 
-       if (!jh || *nr_to_scan == 0)
+       *released = false;
+       if (!jh)
                return 0;
 
        last_jh = jh->b_cpprev;
@@ -475,12 +376,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
                jh = next_jh;
                next_jh = jh->b_cpnext;
 
-               (*nr_to_scan)--;
-               if (__cp_buffer_busy(jh))
-                       continue;
+               if (destroy) {
+                       ret = __jbd2_journal_remove_checkpoint(jh);
+               } else {
+                       ret = jbd2_journal_try_remove_checkpoint(jh);
+                       if (ret < 0)
+                               continue;
+               }
 
                nr_freed++;
-               ret = __jbd2_journal_remove_checkpoint(jh);
                if (ret) {
                        *released = true;
                        break;
@@ -488,7 +392,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
 
                if (need_resched())
                        break;
-       } while (jh != last_jh && *nr_to_scan);
+       } while (jh != last_jh);
 
        return nr_freed;
 }
@@ -506,11 +410,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
                                                  unsigned long *nr_to_scan)
 {
        transaction_t *transaction, *last_transaction, *next_transaction;
-       bool released;
+       bool __maybe_unused released;
        tid_t first_tid = 0, last_tid = 0, next_tid = 0;
        tid_t tid = 0;
        unsigned long nr_freed = 0;
-       unsigned long nr_scanned = *nr_to_scan;
+       unsigned long freed;
 
 again:
        spin_lock(&journal->j_list_lock);
@@ -539,19 +443,11 @@ again:
                transaction = next_transaction;
                next_transaction = transaction->t_cpnext;
                tid = transaction->t_tid;
-               released = false;
-
-               nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
-                                                      nr_to_scan, &released);
-               if (*nr_to_scan == 0)
-                       break;
-               if (need_resched() || spin_needbreak(&journal->j_list_lock))
-                       break;
-               if (released)
-                       continue;
 
-               nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
-                                                      nr_to_scan, &released);
+               freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+                                                  false, &released);
+               nr_freed += freed;
+               (*nr_to_scan) -= min(*nr_to_scan, freed);
                if (*nr_to_scan == 0)
                        break;
                if (need_resched() || spin_needbreak(&journal->j_list_lock))
@@ -572,9 +468,8 @@ again:
        if (*nr_to_scan && next_tid)
                goto again;
 out:
-       nr_scanned -= *nr_to_scan;
        trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
-                                         nr_freed, nr_scanned, next_tid);
+                                         nr_freed, next_tid);
 
        return nr_freed;
 }
@@ -590,7 +485,7 @@ out:
 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
 {
        transaction_t *transaction, *last_transaction, *next_transaction;
-       int ret;
+       bool released;
 
        transaction = journal->j_checkpoint_transactions;
        if (!transaction)
@@ -601,8 +496,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
        do {
                transaction = next_transaction;
                next_transaction = transaction->t_cpnext;
-               ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
-                                               destroy);
+               journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+                                          destroy, &released);
                /*
                 * This function only frees up some memory if possible so we
                 * dont have an obligation to finish processing. Bail out if
@@ -610,23 +505,12 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
                 */
                if (need_resched())
                        return;
-               if (ret)
-                       continue;
-               /*
-                * It is essential that we are as careful as in the case of
-                * t_checkpoint_list with removing the buffer from the list as
-                * we can possibly see not yet submitted buffers on io_list
-                */
-               ret = journal_clean_one_cp_list(transaction->
-                               t_checkpoint_io_list, destroy);
-               if (need_resched())
-                       return;
                /*
                 * Stop scanning if we couldn't free the transaction. This
                 * avoids pointless scanning of transactions which still
                 * weren't checkpointed.
                 */
-               if (!ret)
+               if (!released)
                        return;
        } while (transaction != last_transaction);
 }
@@ -705,7 +589,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
        jbd2_journal_put_journal_head(jh);
 
        /* Is this transaction empty? */
-       if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
+       if (transaction->t_checkpoint_list)
                return 0;
 
        /*
@@ -737,6 +621,34 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 }
 
 /*
+ * Check the checkpoint buffer and try to remove it from the checkpoint
+ * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if
+ * it frees the transaction, 0 otherwise.
+ *
+ * This function is called with j_list_lock held.
+ */
+int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
+{
+       struct buffer_head *bh = jh2bh(jh);
+
+       if (!trylock_buffer(bh))
+               return -EBUSY;
+       if (buffer_dirty(bh)) {
+               unlock_buffer(bh);
+               return -EBUSY;
+       }
+       unlock_buffer(bh);
+
+       /*
+        * Buffer is clean and the IO has finished (we held the buffer
+        * lock) so the checkpoint is done. We can safely remove the
+        * buffer from this transaction.
+        */
+       JBUFFER_TRACE(jh, "remove from checkpoint list");
+       return __jbd2_journal_remove_checkpoint(jh);
+}
+
+/*
  * journal_insert_checkpoint: put a committed buffer onto a checkpoint
  * list so that we know when it is safe to clean the transaction out of
  * the log.
@@ -797,7 +709,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
        J_ASSERT(transaction->t_forget == NULL);
        J_ASSERT(transaction->t_shadow_list == NULL);
        J_ASSERT(transaction->t_checkpoint_list == NULL);
-       J_ASSERT(transaction->t_checkpoint_io_list == NULL);
        J_ASSERT(atomic_read(&transaction->t_updates) == 0);
        J_ASSERT(journal->j_committing_transaction != transaction);
        J_ASSERT(journal->j_running_transaction != transaction);
index b33155d..1073259 100644 (file)
@@ -1141,8 +1141,7 @@ restart_loop:
        spin_lock(&journal->j_list_lock);
        commit_transaction->t_state = T_FINISHED;
        /* Check if the transaction can be dropped now that we are finished */
-       if (commit_transaction->t_checkpoint_list == NULL &&
-           commit_transaction->t_checkpoint_io_list == NULL) {
+       if (commit_transaction->t_checkpoint_list == NULL) {
                __jbd2_journal_drop_transaction(journal, commit_transaction);
                jbd2_journal_free_transaction(commit_transaction);
        }
index 1861124..4d1fda1 100644 (file)
@@ -1784,8 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
                 * Otherwise, if the buffer has been written to disk,
                 * it is safe to remove the checkpoint and drop it.
                 */
-               if (!buffer_dirty(bh)) {
-                       __jbd2_journal_remove_checkpoint(jh);
+               if (jbd2_journal_try_remove_checkpoint(jh) >= 0) {
                        spin_unlock(&journal->j_list_lock);
                        goto drop;
                }
@@ -2100,35 +2099,6 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
        __brelse(bh);
 }
 
-/*
- * Called from jbd2_journal_try_to_free_buffers().
- *
- * Called under jh->b_state_lock
- */
-static void
-__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
-{
-       struct journal_head *jh;
-
-       jh = bh2jh(bh);
-
-       if (buffer_locked(bh) || buffer_dirty(bh))
-               goto out;
-
-       if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
-               goto out;
-
-       spin_lock(&journal->j_list_lock);
-       if (jh->b_cp_transaction != NULL) {
-               /* written-back checkpointed metadata buffer */
-               JBUFFER_TRACE(jh, "remove from checkpoint list");
-               __jbd2_journal_remove_checkpoint(jh);
-       }
-       spin_unlock(&journal->j_list_lock);
-out:
-       return;
-}
-
 /**
  * jbd2_journal_try_to_free_buffers() - try to free page buffers.
  * @journal: journal for operation
@@ -2186,7 +2156,13 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
                        continue;
 
                spin_lock(&jh->b_state_lock);
-               __journal_try_to_free_buffer(journal, bh);
+               if (!jh->b_transaction && !jh->b_next_transaction) {
+                       spin_lock(&journal->j_list_lock);
+                       /* Remove written-back checkpointed metadata buffer */
+                       if (jh->b_cp_transaction != NULL)
+                               jbd2_journal_try_remove_checkpoint(jh);
+                       spin_unlock(&journal->j_list_lock);
+               }
                spin_unlock(&jh->b_state_lock);
                jbd2_journal_put_journal_head(jh);
                if (buffer_jbd(bh))
index 5075a0a..091ab0e 100644 (file)
@@ -204,7 +204,8 @@ static int jffs2_create(struct mnt_idmap *idmap, struct inode *dir_i,
        if (ret)
                goto fail;
 
-       dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
+       dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+                                              ITIME(je32_to_cpu(ri->ctime)));
 
        jffs2_free_raw_inode(ri);
 
@@ -237,7 +238,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
        if (dead_f->inocache)
                set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink);
        if (!ret)
-               dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+               dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
        return ret;
 }
 /***********************************************************************/
@@ -271,7 +272,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
                set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink);
                mutex_unlock(&f->sem);
                d_instantiate(dentry, d_inode(old_dentry));
-               dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+               dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
                ihold(d_inode(old_dentry));
        }
        return ret;
@@ -422,7 +423,8 @@ static int jffs2_symlink (struct mnt_idmap *idmap, struct inode *dir_i,
                goto fail;
        }
 
-       dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+       dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+                                              ITIME(je32_to_cpu(rd->mctime)));
 
        jffs2_free_raw_dirent(rd);
 
@@ -566,7 +568,8 @@ static int jffs2_mkdir (struct mnt_idmap *idmap, struct inode *dir_i,
                goto fail;
        }
 
-       dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+       dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+                                              ITIME(je32_to_cpu(rd->mctime)));
        inc_nlink(dir_i);
 
        jffs2_free_raw_dirent(rd);
@@ -607,7 +610,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
        ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
                              dentry->d_name.len, f, now);
        if (!ret) {
-               dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+               dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
                clear_nlink(d_inode(dentry));
                drop_nlink(dir_i);
        }
@@ -743,7 +746,8 @@ static int jffs2_mknod (struct mnt_idmap *idmap, struct inode *dir_i,
                goto fail;
        }
 
-       dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+       dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+                                              ITIME(je32_to_cpu(rd->mctime)));
 
        jffs2_free_raw_dirent(rd);
 
@@ -864,14 +868,16 @@ static int jffs2_rename (struct mnt_idmap *idmap,
                 * caller won't do it on its own since we are returning an error.
                 */
                d_invalidate(new_dentry);
-               new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
+               new_dir_i->i_mtime = inode_set_ctime_to_ts(new_dir_i,
+                                                          ITIME(now));
                return ret;
        }
 
        if (d_is_dir(old_dentry))
                drop_nlink(old_dir_i);
 
-       new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now);
+       old_dir_i->i_mtime = inode_set_ctime_to_ts(old_dir_i, ITIME(now));
+       new_dir_i->i_mtime = inode_set_ctime_to_ts(new_dir_i, ITIME(now));
 
        return 0;
 }
index 2345ca3..11c6679 100644 (file)
@@ -317,7 +317,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
                        inode->i_size = pos + writtenlen;
                        inode->i_blocks = (inode->i_size + 511) >> 9;
 
-                       inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime));
+                       inode->i_mtime = inode_set_ctime_to_ts(inode,
+                                                              ITIME(je32_to_cpu(ri->ctime)));
                }
        }
 
index 038516b..0403efa 100644 (file)
@@ -115,7 +115,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
        ri->isize = cpu_to_je32((ivalid & ATTR_SIZE)?iattr->ia_size:inode->i_size);
        ri->atime = cpu_to_je32(I_SEC((ivalid & ATTR_ATIME)?iattr->ia_atime:inode->i_atime));
        ri->mtime = cpu_to_je32(I_SEC((ivalid & ATTR_MTIME)?iattr->ia_mtime:inode->i_mtime));
-       ri->ctime = cpu_to_je32(I_SEC((ivalid & ATTR_CTIME)?iattr->ia_ctime:inode->i_ctime));
+       ri->ctime = cpu_to_je32(I_SEC((ivalid & ATTR_CTIME)?iattr->ia_ctime:inode_get_ctime(inode)));
 
        ri->offset = cpu_to_je32(0);
        ri->csize = ri->dsize = cpu_to_je32(mdatalen);
@@ -148,7 +148,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
        }
        /* It worked. Update the inode */
        inode->i_atime = ITIME(je32_to_cpu(ri->atime));
-       inode->i_ctime = ITIME(je32_to_cpu(ri->ctime));
+       inode_set_ctime_to_ts(inode, ITIME(je32_to_cpu(ri->ctime)));
        inode->i_mtime = ITIME(je32_to_cpu(ri->mtime));
        inode->i_mode = jemode_to_cpu(ri->mode);
        i_uid_write(inode, je16_to_cpu(ri->uid));
@@ -284,7 +284,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
        inode->i_size = je32_to_cpu(latest_node.isize);
        inode->i_atime = ITIME(je32_to_cpu(latest_node.atime));
        inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
-       inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
+       inode_set_ctime_to_ts(inode, ITIME(je32_to_cpu(latest_node.ctime)));
 
        set_nlink(inode, f->inocache->pino_nlink);
 
@@ -388,7 +388,7 @@ void jffs2_dirty_inode(struct inode *inode, int flags)
        iattr.ia_gid = inode->i_gid;
        iattr.ia_atime = inode->i_atime;
        iattr.ia_mtime = inode->i_mtime;
-       iattr.ia_ctime = inode->i_ctime;
+       iattr.ia_ctime = inode_get_ctime(inode);
 
        jffs2_do_setattr(inode, &iattr);
 }
@@ -475,7 +475,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
        inode->i_mode = jemode_to_cpu(ri->mode);
        i_gid_write(inode, je16_to_cpu(ri->gid));
        i_uid_write(inode, je16_to_cpu(ri->uid));
-       inode->i_atime = inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime));
 
        inode->i_blocks = 0;
index 8da1976..50727a1 100644 (file)
@@ -35,7 +35,7 @@ struct kvec;
 #define ITIME(sec) ((struct timespec64){sec, 0})
 #define JFFS2_NOW() JFFS2_CLAMP_TIME(ktime_get_real_seconds())
 #define I_SEC(tv) JFFS2_CLAMP_TIME((tv).tv_sec)
-#define JFFS2_F_I_CTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_ctime)
+#define JFFS2_F_I_CTIME(f) I_SEC(inode_get_ctime(OFNI_EDONI_2SFFJ(f)))
 #define JFFS2_F_I_MTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_mtime)
 #define JFFS2_F_I_ATIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_atime)
 #define sleep_on_spinunlock(wq, s)                             \
index fb96f87..1de3602 100644 (file)
@@ -116,7 +116,7 @@ int jfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
        if (!rc) {
                if (update_mode) {
                        inode->i_mode = mode;
-                       inode->i_ctime = current_time(inode);
+                       inode_set_ctime_current(inode);
                        mark_inode_dirty(inode);
                }
                rc = txCommit(tid, 1, &inode, 0);
index 8ac10e3..920d58a 100644 (file)
@@ -393,7 +393,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
                        break;
                }
 
-               ip->i_mtime = ip->i_ctime = current_time(ip);
+               ip->i_mtime = inode_set_ctime_current(ip);
                mark_inode_dirty(ip);
 
                txCommit(tid, 1, &ip, 0);
index ed7989b..f7bd7e8 100644 (file)
@@ -96,7 +96,7 @@ int jfs_fileattr_set(struct mnt_idmap *idmap,
        jfs_inode->mode2 = flags;
 
        jfs_set_inode_flags(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 
        return 0;
index 390cbfc..a40383a 100644 (file)
@@ -3064,8 +3064,8 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
        ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
        ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
        ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
-       ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
-       ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
+       inode_set_ctime(ip, le32_to_cpu(dip->di_ctime.tv_sec),
+                       le32_to_cpu(dip->di_ctime.tv_nsec));
        ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
        ip->i_generation = le32_to_cpu(dip->di_gen);
 
@@ -3139,8 +3139,8 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
 
        dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
        dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
-       dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
-       dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
+       dip->di_ctime.tv_sec = cpu_to_le32(inode_get_ctime(ip).tv_sec);
+       dip->di_ctime.tv_nsec = cpu_to_le32(inode_get_ctime(ip).tv_nsec);
        dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
        dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
        dip->di_ixpxd = jfs_ip->ixpxd;  /* in-memory pxd's are little-endian */
index 9e1f027..87594ef 100644 (file)
@@ -97,8 +97,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
        jfs_inode->mode2 |= inode->i_mode;
 
        inode->i_blocks = 0;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
-       jfs_inode->otime = inode->i_ctime.tv_sec;
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
+       jfs_inode->otime = inode_get_ctime(inode).tv_sec;
        inode->i_generation = JFS_SBI(sb)->gengen++;
 
        jfs_inode->cflag = 0;
index 9b03029..029d470 100644 (file)
@@ -149,7 +149,7 @@ static int jfs_create(struct mnt_idmap *idmap, struct inode *dip,
 
        mark_inode_dirty(ip);
 
-       dip->i_ctime = dip->i_mtime = current_time(dip);
+       dip->i_mtime = inode_set_ctime_current(dip);
 
        mark_inode_dirty(dip);
 
@@ -284,7 +284,7 @@ static int jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip,
 
        /* update parent directory inode */
        inc_nlink(dip);         /* for '..' from child directory */
-       dip->i_ctime = dip->i_mtime = current_time(dip);
+       dip->i_mtime = inode_set_ctime_current(dip);
        mark_inode_dirty(dip);
 
        rc = txCommit(tid, 2, &iplist[0], 0);
@@ -390,7 +390,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
        /* update parent directory's link count corresponding
         * to ".." entry of the target directory deleted
         */
-       dip->i_ctime = dip->i_mtime = current_time(dip);
+       dip->i_mtime = inode_set_ctime_current(dip);
        inode_dec_link_count(dip);
 
        /*
@@ -512,7 +512,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
 
        ASSERT(ip->i_nlink);
 
-       ip->i_ctime = dip->i_ctime = dip->i_mtime = current_time(ip);
+       dip->i_mtime = inode_set_ctime_to_ts(dip, inode_set_ctime_current(ip));
        mark_inode_dirty(dip);
 
        /* update target's inode */
@@ -827,8 +827,8 @@ static int jfs_link(struct dentry *old_dentry,
 
        /* update object inode */
        inc_nlink(ip);          /* for new link */
-       ip->i_ctime = current_time(ip);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       inode_set_ctime_current(ip);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        ihold(ip);
 
@@ -1028,7 +1028,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
 
        mark_inode_dirty(ip);
 
-       dip->i_ctime = dip->i_mtime = current_time(dip);
+       dip->i_mtime = inode_set_ctime_current(dip);
        mark_inode_dirty(dip);
        /*
         * commit update of parent directory and link object
@@ -1205,7 +1205,7 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                        tblk->xflag |= COMMIT_DELETE;
                        tblk->u.ip = new_ip;
                } else {
-                       new_ip->i_ctime = current_time(new_ip);
+                       inode_set_ctime_current(new_ip);
                        mark_inode_dirty(new_ip);
                }
        } else {
@@ -1268,10 +1268,10 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
        /*
         * Update ctime on changed/moved inodes & mark dirty
         */
-       old_ip->i_ctime = current_time(old_ip);
+       inode_set_ctime_current(old_ip);
        mark_inode_dirty(old_ip);
 
-       new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir);
+       new_dir->i_mtime = inode_set_ctime_current(new_dir);
        mark_inode_dirty(new_dir);
 
        /* Build list of inodes modified by this transaction */
@@ -1283,7 +1283,7 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 
        if (old_dir != new_dir) {
                iplist[ipcount++] = new_dir;
-               old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+               old_dir->i_mtime = inode_set_ctime_current(old_dir);
                mark_inode_dirty(old_dir);
        }
 
@@ -1416,7 +1416,7 @@ static int jfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
 
        mark_inode_dirty(ip);
 
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 
        mark_inode_dirty(dir);
 
@@ -1535,9 +1535,10 @@ const struct inode_operations jfs_dir_inode_operations = {
 #endif
 };
 
+WRAP_DIR_ITER(jfs_readdir) // FIXME!
 const struct file_operations jfs_dir_operations = {
        .read           = generic_read_dir,
-       .iterate        = jfs_readdir,
+       .iterate_shared = shared_jfs_readdir,
        .fsync          = jfs_fsync,
        .unlocked_ioctl = jfs_ioctl,
        .compat_ioctl   = compat_ptr_ioctl,
index d2f82cb..2e2f7f6 100644 (file)
@@ -818,7 +818,7 @@ out:
        }
        if (inode->i_size < off+len-towrite)
                i_size_write(inode, off+len-towrite);
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        inode_unlock(inode);
        return len - towrite;
index 931e500..8577ad4 100644 (file)
@@ -647,7 +647,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
        if (old_blocks)
                dquot_free_block(inode, old_blocks);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
 
        return 0;
 }
index 5d82627..c429c42 100644 (file)
@@ -8,16 +8,16 @@
 /**
  * kernel_read_file() - read file contents into a kernel buffer
  *
- * @file       file to read from
- * @offset     where to start reading from (see below).
- * @buf                pointer to a "void *" buffer for reading into (if
+ * @file:      file to read from
+ * @offset:    where to start reading from (see below).
+ * @buf:       pointer to a "void *" buffer for reading into (if
  *             *@buf is NULL, a buffer will be allocated, and
  *             @buf_size will be ignored)
- * @buf_size   size of buf, if already allocated. If @buf not
+ * @buf_size:  size of buf, if already allocated. If @buf not
  *             allocated, this is the largest size to allocate.
- * @file_size  if non-NULL, the full size of @file will be
+ * @file_size: if non-NULL, the full size of @file will be
  *             written here.
- * @id         the kernel_read_file_id identifying the type of
+ * @id:                the kernel_read_file_id identifying the type of
  *             file contents being read (for LSMs to examine)
  *
  * @offset must be 0 unless both @buf and @file_size are non-NULL
index 5a1a4af..6609958 100644 (file)
@@ -556,7 +556,7 @@ void kernfs_put(struct kernfs_node *kn)
        kfree_const(kn->name);
 
        if (kn->iattr) {
-               simple_xattrs_free(&kn->iattr->xattrs);
+               simple_xattrs_free(&kn->iattr->xattrs, NULL);
                kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
        }
        spin_lock(&kernfs_idr_lock);
index b22b74d..922719a 100644 (file)
@@ -151,8 +151,7 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
 static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
 {
        inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime =
-               inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 }
 
 static inline void set_inode_attr(struct inode *inode,
@@ -162,7 +161,7 @@ static inline void set_inode_attr(struct inode *inode,
        inode->i_gid = attrs->ia_gid;
        inode->i_atime = attrs->ia_atime;
        inode->i_mtime = attrs->ia_mtime;
-       inode->i_ctime = attrs->ia_ctime;
+       inode_set_ctime_to_ts(inode, attrs->ia_ctime);
 }
 
 static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
@@ -191,7 +190,7 @@ int kernfs_iop_getattr(struct mnt_idmap *idmap,
 
        down_read(&root->kernfs_iattr_rwsem);
        kernfs_refresh_inode(kn, inode);
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        up_read(&root->kernfs_iattr_rwsem);
 
        return 0;
@@ -306,11 +305,17 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
 int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
                     const void *value, size_t size, int flags)
 {
+       struct simple_xattr *old_xattr;
        struct kernfs_iattrs *attrs = kernfs_iattrs(kn);
        if (!attrs)
                return -ENOMEM;
 
-       return simple_xattr_set(&attrs->xattrs, name, value, size, flags, NULL);
+       old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags);
+       if (IS_ERR(old_xattr))
+               return PTR_ERR(old_xattr);
+
+       simple_xattr_free(old_xattr);
+       return 0;
 }
 
 static int kernfs_vfs_xattr_get(const struct xattr_handler *handler,
@@ -342,7 +347,7 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
 {
        atomic_t *sz = &kn->iattr->user_xattr_size;
        atomic_t *nr = &kn->iattr->nr_user_xattrs;
-       ssize_t removed_size;
+       struct simple_xattr *old_xattr;
        int ret;
 
        if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) {
@@ -355,13 +360,18 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
                goto dec_size_out;
        }
 
-       ret = simple_xattr_set(xattrs, full_name, value, size, flags,
-                              &removed_size);
-
-       if (!ret && removed_size >= 0)
-               size = removed_size;
-       else if (!ret)
+       old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
+       if (!old_xattr)
                return 0;
+
+       if (IS_ERR(old_xattr)) {
+               ret = PTR_ERR(old_xattr);
+               goto dec_size_out;
+       }
+
+       ret = 0;
+       size = old_xattr->size;
+       simple_xattr_free(old_xattr);
 dec_size_out:
        atomic_sub(size, sz);
 dec_count_out:
@@ -376,18 +386,19 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
 {
        atomic_t *sz = &kn->iattr->user_xattr_size;
        atomic_t *nr = &kn->iattr->nr_user_xattrs;
-       ssize_t removed_size;
-       int ret;
+       struct simple_xattr *old_xattr;
 
-       ret = simple_xattr_set(xattrs, full_name, value, size, flags,
-                              &removed_size);
+       old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
+       if (!old_xattr)
+               return 0;
 
-       if (removed_size >= 0) {
-               atomic_sub(removed_size, sz);
-               atomic_dec(nr);
-       }
+       if (IS_ERR(old_xattr))
+               return PTR_ERR(old_xattr);
 
-       return ret;
+       atomic_sub(old_xattr->size, sz);
+       atomic_dec(nr);
+       simple_xattr_free(old_xattr);
+       return 0;
 }
 
 static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
index 5b85131..da78eb6 100644 (file)
@@ -33,7 +33,7 @@ int simple_getattr(struct mnt_idmap *idmap, const struct path *path,
                   unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
        return 0;
 }
@@ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = {
 };
 EXPORT_SYMBOL(simple_dir_inode_operations);
 
+static void offset_set(struct dentry *dentry, u32 offset)
+{
+       dentry->d_fsdata = (void *)((uintptr_t)(offset));
+}
+
+static u32 dentry2offset(struct dentry *dentry)
+{
+       return (u32)((uintptr_t)(dentry->d_fsdata));
+}
+
+static struct lock_class_key simple_offset_xa_lock;
+
+/**
+ * simple_offset_init - initialize an offset_ctx
+ * @octx: directory offset map to be initialized
+ *
+ */
+void simple_offset_init(struct offset_ctx *octx)
+{
+       xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
+       lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
+
+       /* 0 is '.', 1 is '..', so always start with offset 2 */
+       octx->next_offset = 2;
+}
+
+/**
+ * simple_offset_add - Add an entry to a directory's offset map
+ * @octx: directory offset ctx to be updated
+ * @dentry: new dentry being added
+ *
+ * Returns zero on success. @so_ctx and the dentry offset are updated.
+ * Otherwise, a negative errno value is returned.
+ */
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
+{
+       static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
+       u32 offset;
+       int ret;
+
+       if (dentry2offset(dentry) != 0)
+               return -EBUSY;
+
+       ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
+                             &octx->next_offset, GFP_KERNEL);
+       if (ret < 0)
+               return ret;
+
+       offset_set(dentry, offset);
+       return 0;
+}
+
+/**
+ * simple_offset_remove - Remove an entry to a directory's offset map
+ * @octx: directory offset ctx to be updated
+ * @dentry: dentry being removed
+ *
+ */
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
+{
+       u32 offset;
+
+       offset = dentry2offset(dentry);
+       if (offset == 0)
+               return;
+
+       xa_erase(&octx->xa, offset);
+       offset_set(dentry, 0);
+}
+
+/**
+ * simple_offset_rename_exchange - exchange rename with directory offsets
+ * @old_dir: parent of dentry being moved
+ * @old_dentry: dentry being moved
+ * @new_dir: destination parent
+ * @new_dentry: destination dentry
+ *
+ * Returns zero on success. Otherwise a negative errno is returned and the
+ * rename is rolled back.
+ */
+int simple_offset_rename_exchange(struct inode *old_dir,
+                                 struct dentry *old_dentry,
+                                 struct inode *new_dir,
+                                 struct dentry *new_dentry)
+{
+       struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
+       struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
+       u32 old_index = dentry2offset(old_dentry);
+       u32 new_index = dentry2offset(new_dentry);
+       int ret;
+
+       simple_offset_remove(old_ctx, old_dentry);
+       simple_offset_remove(new_ctx, new_dentry);
+
+       ret = simple_offset_add(new_ctx, old_dentry);
+       if (ret)
+               goto out_restore;
+
+       ret = simple_offset_add(old_ctx, new_dentry);
+       if (ret) {
+               simple_offset_remove(new_ctx, old_dentry);
+               goto out_restore;
+       }
+
+       ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+       if (ret) {
+               simple_offset_remove(new_ctx, old_dentry);
+               simple_offset_remove(old_ctx, new_dentry);
+               goto out_restore;
+       }
+       return 0;
+
+out_restore:
+       offset_set(old_dentry, old_index);
+       xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
+       offset_set(new_dentry, new_index);
+       xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
+       return ret;
+}
+
+/**
+ * simple_offset_destroy - Release offset map
+ * @octx: directory offset ctx that is about to be destroyed
+ *
+ * During fs teardown (eg. umount), a directory's offset map might still
+ * contain entries. xa_destroy() cleans out anything that remains.
+ */
+void simple_offset_destroy(struct offset_ctx *octx)
+{
+       xa_destroy(&octx->xa);
+}
+
+/**
+ * offset_dir_llseek - Advance the read position of a directory descriptor
+ * @file: an open directory whose position is to be updated
+ * @offset: a byte offset
+ * @whence: enumerator describing the starting position for this update
+ *
+ * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
+ *
+ * Returns the updated read position if successful; otherwise a
+ * negative errno is returned and the read position remains unchanged.
+ */
+static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+       switch (whence) {
+       case SEEK_CUR:
+               offset += file->f_pos;
+               fallthrough;
+       case SEEK_SET:
+               if (offset >= 0)
+                       break;
+               fallthrough;
+       default:
+               return -EINVAL;
+       }
+
+       return vfs_setpos(file, offset, U32_MAX);
+}
+
+static struct dentry *offset_find_next(struct xa_state *xas)
+{
+       struct dentry *child, *found = NULL;
+
+       rcu_read_lock();
+       child = xas_next_entry(xas, U32_MAX);
+       if (!child)
+               goto out;
+       spin_lock(&child->d_lock);
+       if (simple_positive(child))
+               found = dget_dlock(child);
+       spin_unlock(&child->d_lock);
+out:
+       rcu_read_unlock();
+       return found;
+}
+
+static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
+{
+       u32 offset = dentry2offset(dentry);
+       struct inode *inode = d_inode(dentry);
+
+       return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
+                         inode->i_ino, fs_umode_to_dtype(inode->i_mode));
+}
+
+static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+{
+       struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
+       XA_STATE(xas, &so_ctx->xa, ctx->pos);
+       struct dentry *dentry;
+
+       while (true) {
+               dentry = offset_find_next(&xas);
+               if (!dentry)
+                       break;
+
+               if (!offset_dir_emit(ctx, dentry)) {
+                       dput(dentry);
+                       break;
+               }
+
+               dput(dentry);
+               ctx->pos = xas.xa_index + 1;
+       }
+}
+
+/**
+ * offset_readdir - Emit entries starting at offset @ctx->pos
+ * @file: an open directory to iterate over
+ * @ctx: directory iteration context
+ *
+ * Caller must hold @file's i_rwsem to prevent insertion or removal of
+ * entries during this call.
+ *
+ * On entry, @ctx->pos contains an offset that represents the first entry
+ * to be read from the directory.
+ *
+ * The operation continues until there are no more entries to read, or
+ * until the ctx->actor indicates there is no more space in the caller's
+ * output buffer.
+ *
+ * On return, @ctx->pos contains an offset that will read the next entry
+ * in this directory when offset_readdir() is called again with @ctx.
+ *
+ * Return values:
+ *   %0 - Complete
+ */
+static int offset_readdir(struct file *file, struct dir_context *ctx)
+{
+       struct dentry *dir = file->f_path.dentry;
+
+       lockdep_assert_held(&d_inode(dir)->i_rwsem);
+
+       if (!dir_emit_dots(file, ctx))
+               return 0;
+
+       offset_iterate_dir(d_inode(dir), ctx);
+       return 0;
+}
+
+const struct file_operations simple_offset_dir_operations = {
+       .llseek         = offset_dir_llseek,
+       .iterate_shared = offset_readdir,
+       .read           = generic_read_dir,
+       .fsync          = noop_fsync,
+};
+
 static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
 {
        struct dentry *child = NULL;
@@ -275,7 +523,7 @@ void simple_recursive_removal(struct dentry *dentry,
                while ((child = find_next_child(this, victim)) == NULL) {
                        // kill and ascend
                        // update metadata while it's still locked
-                       inode->i_ctime = current_time(inode);
+                       inode_set_ctime_current(inode);
                        clear_nlink(inode);
                        inode_unlock(inode);
                        victim = this;
@@ -293,8 +541,7 @@ void simple_recursive_removal(struct dentry *dentry,
                                dput(victim);           // unpin it
                        }
                        if (victim == dentry) {
-                               inode->i_ctime = inode->i_mtime =
-                                       current_time(inode);
+                               inode->i_mtime = inode_set_ctime_current(inode);
                                if (d_is_dir(dentry))
                                        drop_nlink(inode);
                                inode_unlock(inode);
@@ -335,7 +582,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
         */
        root->i_ino = 1;
        root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
-       root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
+       root->i_atime = root->i_mtime = inode_set_ctime_current(root);
        s->s_root = d_make_root(root);
        if (!s->s_root)
                return -ENOMEM;
@@ -391,7 +638,8 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
 {
        struct inode *inode = d_inode(old_dentry);
 
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+       dir->i_mtime = inode_set_ctime_to_ts(dir,
+                                            inode_set_ctime_current(inode));
        inc_nlink(inode);
        ihold(inode);
        dget(dentry);
@@ -425,7 +673,8 @@ int simple_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct inode *inode = d_inode(dentry);
 
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+       dir->i_mtime = inode_set_ctime_to_ts(dir,
+                                            inode_set_ctime_current(inode));
        drop_nlink(inode);
        dput(dentry);
        return 0;
@@ -444,6 +693,31 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL(simple_rmdir);
 
+/**
+ * simple_rename_timestamp - update the various inode timestamps for rename
+ * @old_dir: old parent directory
+ * @old_dentry: dentry that is being renamed
+ * @new_dir: new parent directory
+ * @new_dentry: target for rename
+ *
+ * POSIX mandates that the old and new parent directories have their ctime and
+ * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have
+ * their ctime updated.
+ */
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+                            struct inode *new_dir, struct dentry *new_dentry)
+{
+       struct inode *newino = d_inode(new_dentry);
+
+       old_dir->i_mtime = inode_set_ctime_current(old_dir);
+       if (new_dir != old_dir)
+               new_dir->i_mtime = inode_set_ctime_current(new_dir);
+       inode_set_ctime_current(d_inode(old_dentry));
+       if (newino)
+               inode_set_ctime_current(newino);
+}
+EXPORT_SYMBOL_GPL(simple_rename_timestamp);
+
 int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry)
 {
@@ -459,11 +733,7 @@ int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
                        inc_nlink(old_dir);
                }
        }
-       old_dir->i_ctime = old_dir->i_mtime =
-       new_dir->i_ctime = new_dir->i_mtime =
-       d_inode(old_dentry)->i_ctime =
-       d_inode(new_dentry)->i_ctime = current_time(old_dir);
-
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
        return 0;
 }
 EXPORT_SYMBOL_GPL(simple_rename_exchange);
@@ -472,7 +742,6 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                  struct dentry *old_dentry, struct inode *new_dir,
                  struct dentry *new_dentry, unsigned int flags)
 {
-       struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = d_is_dir(old_dentry);
 
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
@@ -495,9 +764,7 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                inc_nlink(new_dir);
        }
 
-       old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
-               new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
-
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
        return 0;
 }
 EXPORT_SYMBOL(simple_rename);
@@ -548,21 +815,20 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len,
                        struct page **pagep, void **fsdata)
 {
-       struct page *page;
-       pgoff_t index;
-
-       index = pos >> PAGE_SHIFT;
+       struct folio *folio;
 
-       page = grab_cache_page_write_begin(mapping, index);
-       if (!page)
-               return -ENOMEM;
+       folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
+                       mapping_gfp_mask(mapping));
+       if (IS_ERR(folio))
+               return PTR_ERR(folio);
 
-       *pagep = page;
+       *pagep = &folio->page;
 
-       if (!PageUptodate(page) && (len != PAGE_SIZE)) {
-               unsigned from = pos & (PAGE_SIZE - 1);
+       if (!folio_test_uptodate(folio) && (len != folio_size(folio))) {
+               size_t from = offset_in_folio(folio, pos);
 
-               zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
+               folio_zero_segments(folio, 0, from,
+                               from + len, folio_size(folio));
        }
        return 0;
 }
@@ -594,17 +860,18 @@ static int simple_write_end(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned copied,
                        struct page *page, void *fsdata)
 {
-       struct inode *inode = page->mapping->host;
+       struct folio *folio = page_folio(page);
+       struct inode *inode = folio->mapping->host;
        loff_t last_pos = pos + copied;
 
-       /* zero the stale part of the page if we did a short copy */
-       if (!PageUptodate(page)) {
+       /* zero the stale part of the folio if we did a short copy */
+       if (!folio_test_uptodate(folio)) {
                if (copied < len) {
-                       unsigned from = pos & (PAGE_SIZE - 1);
+                       size_t from = offset_in_folio(folio, pos);
 
-                       zero_user(page, from + copied, len - copied);
+                       folio_zero_range(folio, from + copied, len - copied);
                }
-               SetPageUptodate(page);
+               folio_mark_uptodate(folio);
        }
        /*
         * No need to use i_size_read() here, the i_size
@@ -613,9 +880,9 @@ static int simple_write_end(struct file *file, struct address_space *mapping,
        if (last_pos > inode->i_size)
                i_size_write(inode, last_pos);
 
-       set_page_dirty(page);
-       unlock_page(page);
-       put_page(page);
+       folio_mark_dirty(folio);
+       folio_unlock(folio);
+       folio_put(folio);
 
        return copied;
 }
@@ -659,7 +926,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
         */
        inode->i_ino = 1;
        inode->i_mode = S_IFDIR | 0755;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_op = &simple_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        set_nlink(inode, 2);
@@ -685,7 +952,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
                        goto out;
                }
                inode->i_mode = S_IFREG | files->mode;
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                inode->i_fop = files->ops;
                inode->i_ino = i;
                d_add(dentry, inode);
@@ -1253,7 +1520,7 @@ struct inode *alloc_anon_inode(struct super_block *s)
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
        inode->i_flags |= S_PRIVATE;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        return inode;
 }
 EXPORT_SYMBOL(alloc_anon_inode);
@@ -1269,7 +1536,7 @@ EXPORT_SYMBOL(alloc_anon_inode);
  * All arguments are ignored and it just returns -EINVAL.
  */
 int
-simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
+simple_nosetlease(struct file *filp, int arg, struct file_lock **flp,
                  void **priv)
 {
        return -EINVAL;
@@ -1315,7 +1582,7 @@ static int empty_dir_getattr(struct mnt_idmap *idmap,
                             u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        return 0;
 }
 
index df8b26a..a45efc1 100644 (file)
@@ -438,7 +438,7 @@ static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
        fl->fl_end = OFFSET_MAX;
 }
 
-static int assign_type(struct file_lock *fl, long type)
+static int assign_type(struct file_lock *fl, int type)
 {
        switch (type) {
        case F_RDLCK:
@@ -549,7 +549,7 @@ static const struct lock_manager_operations lease_manager_ops = {
 /*
  * Initialize a lease, use the default lock manager operations
  */
-static int lease_init(struct file *filp, long type, struct file_lock *fl)
+static int lease_init(struct file *filp, int type, struct file_lock *fl)
 {
        if (assign_type(fl, type) != 0)
                return -EINVAL;
@@ -567,7 +567,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
 }
 
 /* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, long type)
+static struct file_lock *lease_alloc(struct file *filp, int type)
 {
        struct file_lock *fl = locks_alloc_lock();
        int error = -ENOMEM;
@@ -868,6 +868,21 @@ static bool posix_locks_conflict(struct file_lock *caller_fl,
        return locks_conflict(caller_fl, sys_fl);
 }
 
+/* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK
+ * path so checks for additional GETLK-specific things like F_UNLCK.
+ */
+static bool posix_test_locks_conflict(struct file_lock *caller_fl,
+                                     struct file_lock *sys_fl)
+{
+       /* F_UNLCK checks any locks on the same fd. */
+       if (caller_fl->fl_type == F_UNLCK) {
+               if (!posix_same_owner(caller_fl, sys_fl))
+                       return false;
+               return locks_overlap(caller_fl, sys_fl);
+       }
+       return posix_locks_conflict(caller_fl, sys_fl);
+}
+
 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
  * checking before calling the locks_conflict().
  */
@@ -901,7 +916,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 retry:
        spin_lock(&ctx->flc_lock);
        list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
-               if (!posix_locks_conflict(fl, cfl))
+               if (!posix_test_locks_conflict(fl, cfl))
                        continue;
                if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
                        && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
@@ -1301,6 +1316,7 @@ retry:
  out:
        spin_unlock(&ctx->flc_lock);
        percpu_up_read(&file_rwsem);
+       trace_posix_lock_inode(inode, request, error);
        /*
         * Free any unused locks.
         */
@@ -1309,7 +1325,6 @@ retry:
        if (new_fl2)
                locks_free_lock(new_fl2);
        locks_dispose_list(&dispose);
-       trace_posix_lock_inode(inode, request, error);
 
        return error;
 }
@@ -1666,7 +1681,7 @@ int fcntl_getlease(struct file *filp)
  * conflict with the lease we're trying to set.
  */
 static int
-check_conflicting_open(struct file *filp, const long arg, int flags)
+check_conflicting_open(struct file *filp, const int arg, int flags)
 {
        struct inode *inode = file_inode(filp);
        int self_wcount = 0, self_rcount = 0;
@@ -1701,7 +1716,7 @@ check_conflicting_open(struct file *filp, const long arg, int flags)
 }
 
 static int
-generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
+generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **priv)
 {
        struct file_lock *fl, *my_fl = NULL, *lease;
        struct inode *inode = file_inode(filp);
@@ -1859,7 +1874,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
  *     The (input) flp->fl_lmops->lm_break function is required
  *     by break_lease().
  */
-int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
+int generic_setlease(struct file *filp, int arg, struct file_lock **flp,
                        void **priv)
 {
        struct inode *inode = file_inode(filp);
@@ -1906,7 +1921,7 @@ lease_notifier_chain_init(void)
 }
 
 static inline void
-setlease_notifier(long arg, struct file_lock *lease)
+setlease_notifier(int arg, struct file_lock *lease)
 {
        if (arg != F_UNLCK)
                srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
@@ -1942,7 +1957,7 @@ EXPORT_SYMBOL_GPL(lease_unregister_notifier);
  * may be NULL if the lm_setup operation doesn't require it.
  */
 int
-vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
+vfs_setlease(struct file *filp, int arg, struct file_lock **lease, void **priv)
 {
        if (lease)
                setlease_notifier(arg, *lease);
@@ -1953,7 +1968,7 @@ vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 }
 EXPORT_SYMBOL_GPL(vfs_setlease);
 
-static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
+static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
 {
        struct file_lock *fl;
        struct fasync_struct *new;
@@ -1988,7 +2003,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
  *     Note that you also need to call %F_SETSIG to
  *     receive a signal when the lease is broken.
  */
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
 {
        if (arg == F_UNLCK)
                return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
@@ -2136,7 +2151,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
  * @fl: The file_lock who's fl_pid should be translated
  * @ns: The namespace into which the pid should be translated
  *
- * Used to tranlate a fl_pid into a namespace virtual pid number
+ * Used to translate a fl_pid into a namespace virtual pid number
  */
 static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
 {
@@ -2207,7 +2222,8 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
        if (fl == NULL)
                return -ENOMEM;
        error = -EINVAL;
-       if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
+       if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
+                       && flock->l_type != F_WRLCK)
                goto out;
 
        error = flock_to_posix_lock(filp, fl, flock);
@@ -2414,7 +2430,8 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
                return -ENOMEM;
 
        error = -EINVAL;
-       if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
+       if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
+                       && flock->l_type != F_WRLCK)
                goto out;
 
        error = flock64_to_posix_lock(filp, fl, flock);
index 870207b..25c08fb 100644 (file)
@@ -251,7 +251,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode)
        }
        inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
        inode->i_ino = j;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_blocks = 0;
        memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u));
        insert_inode_hash(inode);
index bf9858f..20f23e6 100644 (file)
@@ -281,7 +281,7 @@ got_it:
                de->inode = inode->i_ino;
        }
        dir_commit_chunk(page, pos, sbi->s_dirsize);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        err = minix_handle_dirsync(dir);
 out_put:
@@ -313,7 +313,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
        else
                de->inode = 0;
        dir_commit_chunk(page, pos, len);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        return minix_handle_dirsync(inode);
 }
@@ -436,7 +436,7 @@ int minix_set_link(struct minix_dir_entry *de, struct page *page,
        else
                de->inode = inode->i_ino;
        dir_commit_chunk(page, pos, sbi->s_dirsize);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        return minix_handle_dirsync(dir);
 }
index e9fbb53..df57547 100644 (file)
@@ -501,10 +501,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
        i_gid_write(inode, raw_inode->i_gid);
        set_nlink(inode, raw_inode->i_nlinks);
        inode->i_size = raw_inode->i_size;
-       inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
-       inode->i_mtime.tv_nsec = 0;
-       inode->i_atime.tv_nsec = 0;
-       inode->i_ctime.tv_nsec = 0;
+       inode->i_mtime = inode->i_atime = inode_set_ctime(inode, raw_inode->i_time, 0);
        inode->i_blocks = 0;
        for (i = 0; i < 9; i++)
                minix_inode->u.i1_data[i] = raw_inode->i_zone[i];
@@ -543,10 +540,9 @@ static struct inode *V2_minix_iget(struct inode *inode)
        inode->i_size = raw_inode->i_size;
        inode->i_mtime.tv_sec = raw_inode->i_mtime;
        inode->i_atime.tv_sec = raw_inode->i_atime;
-       inode->i_ctime.tv_sec = raw_inode->i_ctime;
+       inode_set_ctime(inode, raw_inode->i_ctime, 0);
        inode->i_mtime.tv_nsec = 0;
        inode->i_atime.tv_nsec = 0;
-       inode->i_ctime.tv_nsec = 0;
        inode->i_blocks = 0;
        for (i = 0; i < 10; i++)
                minix_inode->u.i2_data[i] = raw_inode->i_zone[i];
@@ -622,7 +618,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
        raw_inode->i_size = inode->i_size;
        raw_inode->i_mtime = inode->i_mtime.tv_sec;
        raw_inode->i_atime = inode->i_atime.tv_sec;
-       raw_inode->i_ctime = inode->i_ctime.tv_sec;
+       raw_inode->i_ctime = inode_get_ctime(inode).tv_sec;
        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
                raw_inode->i_zone[0] = old_encode_dev(inode->i_rdev);
        else for (i = 0; i < 10; i++)
@@ -660,7 +656,7 @@ int minix_getattr(struct mnt_idmap *idmap, const struct path *path,
        struct super_block *sb = path->dentry->d_sb;
        struct inode *inode = d_inode(path->dentry);
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        if (INODE_VERSION(inode) == MINIX_V1)
                stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
        else
index 4461487..ce18ae3 100644 (file)
@@ -131,7 +131,7 @@ static inline int splice_branch(struct inode *inode,
 
        /* We are done with atomic stuff, now do the rest of housekeeping */
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
 
        /* had we spliced it onto indirect block? */
        if (where->bh)
@@ -350,7 +350,7 @@ do_indirects:
                }
                first_whole++;
        }
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 }
 
index 956d518..114084d 100644 (file)
@@ -98,7 +98,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 {
        struct inode *inode = d_inode(old_dentry);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        inode_inc_link_count(inode);
        ihold(inode);
        return add_nondir(dentry, inode);
@@ -154,7 +154,7 @@ static int minix_unlink(struct inode * dir, struct dentry *dentry)
 
        if (err)
                return err;
-       inode->i_ctime = dir->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
        inode_dec_link_count(inode);
        return 0;
 }
@@ -218,7 +218,7 @@ static int minix_rename(struct mnt_idmap *idmap,
                put_page(new_page);
                if (err)
                        goto out_dir;
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
                if (dir_de)
                        drop_nlink(new_inode);
                inode_dec_link_count(new_inode);
index e56ff39..567ee54 100644 (file)
@@ -643,6 +643,8 @@ static bool nd_alloc_stack(struct nameidata *nd)
 
 /**
  * path_connected - Verify that a dentry is below mnt.mnt_root
+ * @mnt: The mountpoint to check.
+ * @dentry: The dentry to check.
  *
  * Rename can sometimes move a file or directory outside of a bind
  * mount, path_connected allows those cases to be detected.
@@ -1083,6 +1085,7 @@ fs_initcall(init_fs_namei_sysctls);
 /**
  * may_follow_link - Check symlink following for unsafe situations
  * @nd: nameidata pathwalk data
+ * @inode: Used for idmapping.
  *
  * In the case of the sysctl_protected_symlinks sysctl being enabled,
  * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
@@ -2890,7 +2893,7 @@ int path_pts(struct path *path)
        dput(path->dentry);
        path->dentry = parent;
        child = d_hash_and_lookup(parent, &this);
-       if (!child)
+       if (IS_ERR_OR_NULL(child))
                return -ENOENT;
 
        path->dentry = child;
index c1eda73..6bed139 100644 (file)
@@ -59,7 +59,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
        res->change_attr = delegation->change_attr;
        if (nfs_have_writebacks(inode))
                res->change_attr++;
-       res->ctime = inode->i_ctime;
+       res->ctime = inode_get_ctime(inode);
        res->mtime = inode->i_mtime;
        res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
                args->bitmap[0];
index 9a18c5a..aaffaaa 100644 (file)
@@ -472,20 +472,26 @@ out:
        return result;
 }
 
-static void
-nfs_direct_join_group(struct list_head *list, struct inode *inode)
+static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
 {
-       struct nfs_page *req, *next;
+       struct nfs_page *req, *subreq;
 
        list_for_each_entry(req, list, wb_list) {
-               if (req->wb_head != req || req->wb_this_page == req)
+               if (req->wb_head != req)
                        continue;
-               for (next = req->wb_this_page;
-                               next != req->wb_head;
-                               next = next->wb_this_page) {
-                       nfs_list_remove_request(next);
-                       nfs_release_request(next);
-               }
+               subreq = req->wb_this_page;
+               if (subreq == req)
+                       continue;
+               do {
+                       /*
+                        * Remove subrequests from this list before freeing
+                        * them in the call to nfs_join_page_group().
+                        */
+                       if (!list_empty(&subreq->wb_list)) {
+                               nfs_list_remove_request(subreq);
+                               nfs_release_request(subreq);
+                       }
+               } while ((subreq = subreq->wb_this_page) != req);
                nfs_join_page_group(req, inode);
        }
 }
index e1706e7..2dc6445 100644 (file)
@@ -116,8 +116,8 @@ static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *
        memset(auxdata, 0, sizeof(*auxdata));
        auxdata->mtime_sec  = inode->i_mtime.tv_sec;
        auxdata->mtime_nsec = inode->i_mtime.tv_nsec;
-       auxdata->ctime_sec  = inode->i_ctime.tv_sec;
-       auxdata->ctime_nsec = inode->i_ctime.tv_nsec;
+       auxdata->ctime_sec  = inode_get_ctime(inode).tv_sec;
+       auxdata->ctime_nsec = inode_get_ctime(inode).tv_nsec;
 
        if (NFS_SERVER(inode)->nfs_client->rpc_ops->version == 4)
                auxdata->change_attr = inode_peek_iversion_raw(inode);
index 8172dd4..e21c073 100644 (file)
@@ -514,7 +514,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 
                memset(&inode->i_atime, 0, sizeof(inode->i_atime));
                memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
-               memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
+               inode_set_ctime(inode, 0, 0);
                inode_set_iversion_raw(inode, 0);
                inode->i_size = 0;
                clear_nlink(inode);
@@ -535,7 +535,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
                else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
                if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-                       inode->i_ctime = fattr->ctime;
+                       inode_set_ctime_to_ts(inode, fattr->ctime);
                else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
                if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -731,7 +731,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
                if ((attr->ia_valid & ATTR_GID) != 0)
                        inode->i_gid = attr->ia_gid;
                if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-                       inode->i_ctime = fattr->ctime;
+                       inode_set_ctime_to_ts(inode, fattr->ctime);
                else
                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
                                        | NFS_INO_INVALID_CTIME);
@@ -749,7 +749,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
 
                if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-                       inode->i_ctime = fattr->ctime;
+                       inode_set_ctime_to_ts(inode, fattr->ctime);
                else
                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
                                        | NFS_INO_INVALID_CTIME);
@@ -765,7 +765,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
 
                if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-                       inode->i_ctime = fattr->ctime;
+                       inode_set_ctime_to_ts(inode, fattr->ctime);
                else
                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
                                        | NFS_INO_INVALID_CTIME);
@@ -912,7 +912,7 @@ out_no_revalidate:
        /* Only return attributes that were revalidated. */
        stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
        stat->change_cookie = inode_peek_iversion_raw(inode);
        stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
@@ -1444,11 +1444,11 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
        }
        /* If we have atomic WCC data, we may update some attributes */
-       ts = inode->i_ctime;
+       ts = inode_get_ctime(inode);
        if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
                        && (fattr->valid & NFS_ATTR_FATTR_CTIME)
                        && timespec64_equal(&ts, &fattr->pre_ctime)) {
-               inode->i_ctime = fattr->ctime;
+               inode_set_ctime_to_ts(inode, fattr->ctime);
        }
 
        ts = inode->i_mtime;
@@ -1510,7 +1510,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
                if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
                        invalid |= NFS_INO_INVALID_MTIME;
 
-               ts = inode->i_ctime;
+               ts = inode_get_ctime(inode);
                if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime))
                        invalid |= NFS_INO_INVALID_CTIME;
 
@@ -1997,7 +1997,7 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
        }
        if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
                        (fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
-               fattr->pre_ctime = inode->i_ctime;
+               fattr->pre_ctime = inode_get_ctime(inode);
                fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
        }
        if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
@@ -2190,7 +2190,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
                        save_cache_validity & NFS_INO_INVALID_MTIME;
 
        if (fattr->valid & NFS_ATTR_FATTR_CTIME)
-               inode->i_ctime = fattr->ctime;
+               inode_set_ctime_to_ts(inode, fattr->ctime);
        else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
                nfsi->cache_validity |=
                        save_cache_validity & NFS_INO_INVALID_CTIME;
index 19d51eb..e7494cd 100644 (file)
@@ -215,7 +215,8 @@ nfs_namespace_getattr(struct mnt_idmap *idmap,
        if (NFS_FH(d_inode(path->dentry))->size != 0)
                return nfs_getattr(idmap, path, stat, request_mask,
                                   query_flags);
-       generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+                        stat);
        return 0;
 }
 
index 63802d1..49f78e2 100644 (file)
@@ -1377,7 +1377,6 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
        for (i = 0; i < np; i++) {
                pages[i] = alloc_page(GFP_KERNEL);
                if (!pages[i]) {
-                       np = i + 1;
                        err = -ENOMEM;
                        goto out;
                }
@@ -1401,8 +1400,8 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
        } while (exception.retry);
 
 out:
-       while (--np >= 0)
-               __free_page(pages[np]);
+       while (--i >= 0)
+               __free_page(pages[i]);
        kfree(pages);
 
        return err;
index 4c9f8bd..47c5c1f 100644 (file)
@@ -328,7 +328,7 @@ extern int update_open_stateid(struct nfs4_state *state,
                                const nfs4_stateid *open_stateid,
                                const nfs4_stateid *deleg_stateid,
                                fmode_t fmode);
-extern int nfs4_proc_setlease(struct file *file, long arg,
+extern int nfs4_proc_setlease(struct file *file, int arg,
                              struct file_lock **lease, void **priv);
 extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
                struct nfs_fsinfo *fsinfo);
index 4aeadd6..02788c3 100644 (file)
@@ -438,7 +438,7 @@ void nfs42_ssc_unregister_ops(void)
 }
 #endif /* CONFIG_NFS_V4_2 */
 
-static int nfs4_setlease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_setlease(struct file *file, int arg, struct file_lock **lease,
                         void **priv)
 {
        return nfs4_proc_setlease(file, arg, lease, priv);
index e1a886b..d57aaf0 100644 (file)
@@ -6004,9 +6004,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf,
 out_ok:
        ret = res.acl_len;
 out_free:
-       for (i = 0; i < npages; i++)
-               if (pages[i])
-                       __free_page(pages[i]);
+       while (--i >= 0)
+               __free_page(pages[i]);
        if (res.acl_scratch)
                __free_page(res.acl_scratch);
        kfree(pages);
@@ -7181,8 +7180,15 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
                } else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
                        goto out_restart;
                break;
-       case -NFS4ERR_BAD_STATEID:
        case -NFS4ERR_OLD_STATEID:
+               if (data->arg.new_lock_owner != 0 &&
+                       nfs4_refresh_open_old_stateid(&data->arg.open_stateid,
+                                       lsp->ls_state))
+                       goto out_restart;
+               if (nfs4_refresh_lock_old_stateid(&data->arg.lock_stateid, lsp))
+                       goto out_restart;
+               fallthrough;
+       case -NFS4ERR_BAD_STATEID:
        case -NFS4ERR_STALE_STATEID:
        case -NFS4ERR_EXPIRED:
                if (data->arg.new_lock_owner != 0) {
@@ -7573,7 +7579,7 @@ static int nfs4_delete_lease(struct file *file, void **priv)
        return generic_setlease(file, F_UNLCK, NULL, priv);
 }
 
-static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease,
                          void **priv)
 {
        struct inode *inode = file_inode(file);
@@ -7591,7 +7597,7 @@ static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
        return -EAGAIN;
 }
 
-int nfs4_proc_setlease(struct file *file, long arg, struct file_lock **lease,
+int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease,
                       void **priv)
 {
        switch (arg) {
index acda8f0..bf378ec 100644 (file)
@@ -345,8 +345,10 @@ void nfs_sysfs_move_sb_to_server(struct nfs_server *server)
        int ret = -ENOMEM;
 
        s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id);
-       if (s)
+       if (s) {
                ret = kobject_rename(&server->kobj, s);
+               kfree(s);
+       }
        if (ret < 0)
                pr_warn("NFS: rename sysfs %s failed (%d)\n",
                                        server->kobj.name, ret);
index 6e61fa3..daf305d 100644 (file)
@@ -1354,9 +1354,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)
        trace_nfsd_stid_revoke(&dp->dl_stid);
 
        if (clp->cl_minorversion) {
+               spin_lock(&clp->cl_lock);
                dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
                refcount_inc(&dp->dl_stid.sc_count);
-               spin_lock(&clp->cl_lock);
                list_add(&dp->dl_recall_lru, &clp->cl_revoked);
                spin_unlock(&clp->cl_lock);
        }
@@ -6341,8 +6341,6 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
        if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
                CLOSE_STATEID(stateid))
                return status;
-       if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid))
-               return status;
        spin_lock(&cl->cl_lock);
        s = find_stateid_locked(cl, stateid);
        if (!s)
index 1b8b1aa..3709830 100644 (file)
@@ -1105,6 +1105,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
                        if (!nn->nfsd_serv)
                                return -EBUSY;
                        trace_nfsd_end_grace(netns(file));
+                       nfsd4_end_grace(nn);
                        break;
                default:
                        return -EINVAL;
@@ -1131,7 +1132,7 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
        /* Following advice from simple_fill_super documentation: */
        inode->i_ino = iunique(sb, NFSD_MaxReserved);
        inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        switch (mode & S_IFMT) {
        case S_IFDIR:
                inode->i_fop = &simple_dir_operations;
index 8a2321d..9b7acba 100644 (file)
@@ -520,7 +520,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
        nfsd_sanitize_attrs(inode, iap);
 
-       if (check_guard && guardtime != inode->i_ctime.tv_sec)
+       if (check_guard && guardtime != inode_get_ctime(inode).tv_sec)
                return nfserr_notsync;
 
        /*
@@ -956,10 +956,13 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
        last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
        for (page += offset / PAGE_SIZE; page <= last_page; page++) {
                /*
-                * Skip page replacement when extending the contents
-                * of the current page.
+                * Skip page replacement when extending the contents of the
+                * current page.  But note that we may get two zero_pages in a
+                * row from shmem.
                 */
-               if (page == *(rqstp->rq_next_page - 1))
+               if (page == *(rqstp->rq_next_page - 1) &&
+                   offset_in_page(rqstp->rq_res.page_base +
+                                  rqstp->rq_res.page_len))
                        continue;
                if (unlikely(!svc_rqst_replace_page(rqstp, page)))
                        return -EIO;
index decd647..bce734b 100644 (file)
@@ -429,7 +429,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
        nilfs_set_de_type(de, inode);
        nilfs_commit_chunk(page, mapping, from, to);
        nilfs_put_page(page);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 }
 
 /*
@@ -519,7 +519,7 @@ got_it:
        de->inode = cpu_to_le64(inode->i_ino);
        nilfs_set_de_type(de, inode);
        nilfs_commit_chunk(page, page->mapping, from, to);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        nilfs_mark_inode_dirty(dir);
        /* OFFSET_CACHE */
 out_put:
@@ -567,7 +567,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
                pde->rec_len = nilfs_rec_len_to_disk(to - from);
        dir->inode = 0;
        nilfs_commit_chunk(page, mapping, from, to);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
 out:
        nilfs_put_page(page);
        return err;
index a8ce522..d588c71 100644 (file)
@@ -366,7 +366,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
        atomic64_inc(&root->inodes_count);
        inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
        inode->i_ino = ino;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 
        if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
                err = nilfs_bmap_read(ii->i_bmap, NULL);
@@ -450,10 +450,10 @@ int nilfs_read_inode_common(struct inode *inode,
        set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
        inode->i_size = le64_to_cpu(raw_inode->i_size);
        inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
-       inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
+       inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
+                       le32_to_cpu(raw_inode->i_ctime_nsec));
        inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
        inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
-       inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
        inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
        if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
                return -EIO; /* this inode is for metadata and corrupted */
@@ -768,9 +768,9 @@ void nilfs_write_inode_common(struct inode *inode,
        raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
        raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
        raw_inode->i_size = cpu_to_le64(inode->i_size);
-       raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+       raw_inode->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
        raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
-       raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
        raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
        raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
 
@@ -875,7 +875,7 @@ void nilfs_truncate(struct inode *inode)
 
        nilfs_truncate_bmap(ii, blkoff);
 
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        if (IS_SYNC(inode))
                nilfs_set_transaction_flag(NILFS_TI_SYNC);
 
@@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
 
 int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
 {
+       struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
        struct buffer_head *ibh;
        int err;
 
+       /*
+        * Do not dirty inodes after the log writer has been detached
+        * and its nilfs_root struct has been freed.
+        */
+       if (unlikely(nilfs_purging(nilfs)))
+               return 0;
+
        err = nilfs_load_inode_block(inode, &ibh);
        if (unlikely(err)) {
                nilfs_warn(inode->i_sb,
index 1dfbc0c..40ffade 100644 (file)
@@ -149,7 +149,7 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap,
        NILFS_I(inode)->i_flags = oldflags | (flags & FS_FL_USER_MODIFIABLE);
 
        nilfs_set_inode_flags(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        if (IS_SYNC(inode))
                nilfs_set_transaction_flag(NILFS_TI_SYNC);
 
index c7024da..2a4e7f4 100644 (file)
@@ -185,7 +185,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
        if (err)
                return err;
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        inode_inc_link_count(inode);
        ihold(inode);
 
@@ -283,7 +283,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
        if (err)
                goto out;
 
-       inode->i_ctime = dir->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
        drop_nlink(inode);
        err = 0;
 out:
@@ -387,7 +387,7 @@ static int nilfs_rename(struct mnt_idmap *idmap,
                        goto out_dir;
                nilfs_set_link(new_dir, new_de, new_page, old_inode);
                nilfs_mark_inode_dirty(new_dir);
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
                if (dir_de)
                        drop_nlink(new_inode);
                drop_nlink(new_inode);
@@ -406,7 +406,7 @@ static int nilfs_rename(struct mnt_idmap *idmap,
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
-       old_inode->i_ctime = current_time(old_inode);
+       inode_set_ctime_current(old_inode);
 
        nilfs_delete_entry(old_de, old_page);
 
index c255302..7ec1687 100644 (file)
@@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
                struct folio *folio = fbatch.folios[i];
 
                folio_lock(folio);
+               if (unlikely(folio->mapping != mapping)) {
+                       /* Exclude folios removed from the address space */
+                       folio_unlock(folio);
+                       continue;
+               }
                head = folio_buffers(folio);
                if (!head) {
                        create_empty_buffers(&folio->page, i_blocksize(inode), 0);
@@ -2845,6 +2850,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
                nilfs_segctor_destroy(nilfs->ns_writer);
                nilfs->ns_writer = NULL;
        }
+       set_nilfs_purging(nilfs);
 
        /* Force to free the list of dirty files */
        spin_lock(&nilfs->ns_inode_lock);
@@ -2857,4 +2863,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
        up_write(&nilfs->ns_segctor_sem);
 
        nilfs_dispose_list(nilfs, &garbage_list, 1);
+       clear_nilfs_purging(nilfs);
 }
index 0ef8c71..a5d1fa4 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/writeback.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
+#include <linux/fs_context.h>
 #include "nilfs.h"
 #include "export.h"
 #include "mdt.h"
@@ -1216,7 +1217,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
 }
 
 struct nilfs_super_data {
-       struct block_device *bdev;
        __u64 cno;
        int flags;
 };
@@ -1283,64 +1283,49 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd)
 
 static int nilfs_set_bdev_super(struct super_block *s, void *data)
 {
-       s->s_bdev = data;
-       s->s_dev = s->s_bdev->bd_dev;
+       s->s_dev = *(dev_t *)data;
        return 0;
 }
 
 static int nilfs_test_bdev_super(struct super_block *s, void *data)
 {
-       return (void *)s->s_bdev == data;
+       return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
 }
 
 static struct dentry *
 nilfs_mount(struct file_system_type *fs_type, int flags,
             const char *dev_name, void *data)
 {
-       struct nilfs_super_data sd;
+       struct nilfs_super_data sd = { .flags = flags };
        struct super_block *s;
-       struct dentry *root_dentry;
-       int err, s_new = false;
+       dev_t dev;
+       int err;
 
-       sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
-                                    NULL);
-       if (IS_ERR(sd.bdev))
-               return ERR_CAST(sd.bdev);
+       if (nilfs_identify(data, &sd))
+               return ERR_PTR(-EINVAL);
 
-       sd.cno = 0;
-       sd.flags = flags;
-       if (nilfs_identify((char *)data, &sd)) {
-               err = -EINVAL;
-               goto failed;
-       }
+       err = lookup_bdev(dev_name, &dev);
+       if (err)
+               return ERR_PTR(err);
 
-       /*
-        * once the super is inserted into the list by sget, s_umount
-        * will protect the lockfs code from trying to start a snapshot
-        * while we are mounting
-        */
-       mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
-       if (sd.bdev->bd_fsfreeze_count > 0) {
-               mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
-               err = -EBUSY;
-               goto failed;
-       }
        s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
-                sd.bdev);
-       mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
-       if (IS_ERR(s)) {
-               err = PTR_ERR(s);
-               goto failed;
-       }
+                &dev);
+       if (IS_ERR(s))
+               return ERR_CAST(s);
 
        if (!s->s_root) {
-               s_new = true;
-
-               /* New superblock instance created */
-               snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
-               sb_set_blocksize(s, block_size(sd.bdev));
-
-               err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+               /*
+                * We drop s_umount here because we need to open the bdev and
+                * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+                * __invalidate_device()). It is safe because we have active sb
+                * reference and SB_BORN is not set yet.
+                */
+               up_write(&s->s_umount);
+               err = setup_bdev_super(s, flags, NULL);
+               down_write(&s->s_umount);
+               if (!err)
+                       err = nilfs_fill_super(s, data,
+                                              flags & SB_SILENT ? 1 : 0);
                if (err)
                        goto failed_super;
 
@@ -1366,24 +1351,18 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
        }
 
        if (sd.cno) {
+               struct dentry *root_dentry;
+
                err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
                if (err)
                        goto failed_super;
-       } else {
-               root_dentry = dget(s->s_root);
+               return root_dentry;
        }
 
-       if (!s_new)
-               blkdev_put(sd.bdev, fs_type);
-
-       return root_dentry;
+       return dget(s->s_root);
 
  failed_super:
        deactivate_locked_super(s);
-
- failed:
-       if (!s_new)
-               blkdev_put(sd.bdev, fs_type);
        return ERR_PTR(err);
 }
 
index 47c7dfb..cd4ae1b 100644 (file)
@@ -29,6 +29,7 @@ enum {
        THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
        THE_NILFS_GC_RUNNING,   /* gc process is running */
        THE_NILFS_SB_DIRTY,     /* super block is dirty */
+       THE_NILFS_PURGING,      /* disposing dirty files for cleanup */
 };
 
 /**
@@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
 THE_NILFS_FNS(DISCONTINUED, discontinued)
 THE_NILFS_FNS(GC_RUNNING, gc_running)
 THE_NILFS_FNS(SB_DIRTY, sb_dirty)
+THE_NILFS_FNS(PURGING, purging)
 
 /*
  * Mount option operations
index 52ccd34..a026dbd 100644 (file)
@@ -272,7 +272,7 @@ int unregister_nls(struct nls_table * nls)
        return -EINVAL;
 }
 
-static struct nls_table *find_nls(char *charset)
+static struct nls_table *find_nls(const char *charset)
 {
        struct nls_table *nls;
        spin_lock(&nls_lock);
@@ -288,7 +288,7 @@ static struct nls_table *find_nls(char *charset)
        return nls;
 }
 
-struct nls_table *load_nls(char *charset)
+struct nls_table *load_nls(const char *charset)
 {
        return try_then_request_module(find_nls(charset), "nls_%s", charset);
 }
index 190aa71..ebdcc25 100644 (file)
@@ -199,7 +199,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
 }
 
 /* this conversion is done only at watch creation */
-static __u32 convert_arg(unsigned long arg)
+static __u32 convert_arg(unsigned int arg)
 {
        __u32 new_mask = FS_EVENT_ON_CHILD;
 
@@ -258,7 +258,7 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
  * up here.  Allocate both a mark for fsnotify to add and a dnotify_struct to be
  * attached to the fsnotify_mark.
  */
-int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
 {
        struct dnotify_mark *new_dn_mark, *dn_mark;
        struct fsnotify_mark *new_fsn_mark, *fsn_mark;
index f602a96..647a224 100644 (file)
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -84,7 +84,7 @@ slow:
                return -ENOMEM;
        }
        inode->i_ino = ns->inum;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_flags |= S_IMMUTABLE;
        inode->i_mode = S_IFREG | S_IRUGO;
        inode->i_fop = &ns_file_operations;
index 518c3a2..4596c90 100644 (file)
@@ -1525,10 +1525,11 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
 
 #endif /* NTFS_RW */
 
+WRAP_DIR_ITER(ntfs_readdir) // FIXME!
 const struct file_operations ntfs_dir_ops = {
        .llseek         = generic_file_llseek,  /* Seek inside directory. */
        .read           = generic_read_dir,     /* Return -EISDIR. */
-       .iterate        = ntfs_readdir,         /* Read directory contents. */
+       .iterate_shared = shared_ntfs_readdir,  /* Read directory contents. */
 #ifdef NTFS_RW
        .fsync          = ntfs_dir_fsync,       /* Sync a directory to disk. */
 #endif /* NTFS_RW */
index 6c3f38d..99ac6ea 100644 (file)
@@ -654,7 +654,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
         * always changes, when mtime is changed. ctime can be changed on its
         * own, mtime is then not changed, e.g. when a file is renamed.
         */
-       vi->i_ctime = ntfs2utc(si->last_mft_change_time);
+       inode_set_ctime_to_ts(vi, ntfs2utc(si->last_mft_change_time));
        /*
         * Last access to the data within the file. Not changed during a rename
         * for example but changed whenever the file is written to.
@@ -1218,7 +1218,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
        vi->i_gid       = base_vi->i_gid;
        set_nlink(vi, base_vi->i_nlink);
        vi->i_mtime     = base_vi->i_mtime;
-       vi->i_ctime     = base_vi->i_ctime;
+       inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
        vi->i_atime     = base_vi->i_atime;
        vi->i_generation = ni->seq_no = base_ni->seq_no;
 
@@ -1484,7 +1484,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
        vi->i_gid       = base_vi->i_gid;
        set_nlink(vi, base_vi->i_nlink);
        vi->i_mtime     = base_vi->i_mtime;
-       vi->i_ctime     = base_vi->i_ctime;
+       inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
        vi->i_atime     = base_vi->i_atime;
        vi->i_generation = ni->seq_no = base_ni->seq_no;
        /* Set inode type to zero but preserve permissions. */
@@ -2804,13 +2804,14 @@ done:
         */
        if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
                struct timespec64 now = current_time(VFS_I(base_ni));
+               struct timespec64 ctime = inode_get_ctime(VFS_I(base_ni));
                int sync_it = 0;
 
                if (!timespec64_equal(&VFS_I(base_ni)->i_mtime, &now) ||
-                   !timespec64_equal(&VFS_I(base_ni)->i_ctime, &now))
+                   !timespec64_equal(&ctime, &now))
                        sync_it = 1;
+               inode_set_ctime_to_ts(VFS_I(base_ni), now);
                VFS_I(base_ni)->i_mtime = now;
-               VFS_I(base_ni)->i_ctime = now;
 
                if (sync_it)
                        mark_inode_dirty_sync(VFS_I(base_ni));
@@ -2928,7 +2929,7 @@ int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
        if (ia_valid & ATTR_MTIME)
                vi->i_mtime = attr->ia_mtime;
        if (ia_valid & ATTR_CTIME)
-               vi->i_ctime = attr->ia_ctime;
+               inode_set_ctime_to_ts(vi, attr->ia_ctime);
        mark_inode_dirty(vi);
 out:
        return err;
@@ -3004,7 +3005,7 @@ int __ntfs_write_inode(struct inode *vi, int sync)
                si->last_data_change_time = nt;
                modified = true;
        }
-       nt = utc2ntfs(vi->i_ctime);
+       nt = utc2ntfs(inode_get_ctime(vi));
        if (si->last_mft_change_time != nt) {
                ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
                                "new = 0x%llx", vi->i_ino, (long long)
index 0155f10..ad1a8f7 100644 (file)
@@ -2682,8 +2682,7 @@ mft_rec_already_initialized:
                        vi->i_mode &= ~S_IWUGO;
 
                /* Set the inode times to the current time. */
-               vi->i_atime = vi->i_mtime = vi->i_ctime =
-                       current_time(vi);
+               vi->i_atime = vi->i_mtime = inode_set_ctime_current(vi);
                /*
                 * Set the file size to 0, the ntfs inode sizes are set to 0 by
                 * the call to ntfs_init_big_inode() below.
index 1d6c824..962f12c 100644 (file)
@@ -85,7 +85,7 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
 
        stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED;
 
-       generic_fillattr(idmap, inode, stat);
+       generic_fillattr(idmap, request_mask, inode, stat);
 
        stat->result_mask |= STATX_BTIME;
        stat->btime = ni->i_crtime;
@@ -342,7 +342,7 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count,
                err = 0;
        }
 
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 
        if (IS_SYNC(inode)) {
@@ -400,7 +400,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
        ni_unlock(ni);
 
        ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        if (!IS_DIRSYNC(inode)) {
                dirty = 1;
        } else {
@@ -642,7 +642,7 @@ out:
                filemap_invalidate_unlock(mapping);
 
        if (!err) {
-               inode->i_ctime = inode->i_mtime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                mark_inode_dirty(inode);
        }
 
index 16bd9fa..2b85cb1 100644 (file)
@@ -3265,6 +3265,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
        if (is_rec_inuse(ni->mi.mrec) &&
            !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) {
                bool modified = false;
+               struct timespec64 ctime = inode_get_ctime(inode);
 
                /* Update times in standard attribute. */
                std = ni_std(ni);
@@ -3280,7 +3281,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
                        modified = true;
                }
 
-               dup.c_time = kernel2nt(&inode->i_ctime);
+               dup.c_time = kernel2nt(&ctime);
                if (std->c_time != dup.c_time) {
                        std->c_time = dup.c_time;
                        modified = true;
index dc7e7ab..4123e12 100644 (file)
@@ -44,6 +44,7 @@ static struct inode *ntfs_read_mft(struct inode *inode,
        u64 t64;
        struct MFT_REC *rec;
        struct runs_tree *run;
+       struct timespec64 ctime;
 
        inode->i_op = NULL;
        /* Setup 'uid' and 'gid' */
@@ -169,7 +170,8 @@ next_attr:
                nt2kernel(std5->cr_time, &ni->i_crtime);
 #endif
                nt2kernel(std5->a_time, &inode->i_atime);
-               nt2kernel(std5->c_time, &inode->i_ctime);
+               ctime = inode_get_ctime(inode);
+               nt2kernel(std5->c_time, &ctime);
                nt2kernel(std5->m_time, &inode->i_mtime);
 
                ni->std_fa = std5->fa;
@@ -958,7 +960,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
 
        if (err >= 0) {
                if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) {
-                       inode->i_ctime = inode->i_mtime = current_time(inode);
+                       inode->i_mtime = inode_set_ctime_current(inode);
                        ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
                        dirty = true;
                }
@@ -1658,8 +1660,8 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
        d_instantiate(dentry, inode);
 
        /* Set original time. inode times (i_ctime) may be changed in ntfs_init_acl. */
-       inode->i_atime = inode->i_mtime = inode->i_ctime = dir->i_mtime =
-               dir->i_ctime = ni->i_crtime;
+       inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, ni->i_crtime);
+       dir->i_mtime = inode_set_ctime_to_ts(dir, ni->i_crtime);
 
        mark_inode_dirty(dir);
        mark_inode_dirty(inode);
@@ -1765,9 +1767,9 @@ int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry)
 
        if (!err) {
                drop_nlink(inode);
-               dir->i_mtime = dir->i_ctime = current_time(dir);
+               dir->i_mtime = inode_set_ctime_current(dir);
                mark_inode_dirty(dir);
-               inode->i_ctime = dir->i_ctime;
+               inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
                if (inode->i_nlink)
                        mark_inode_dirty(inode);
        } else if (!ni_remove_name_undo(dir_ni, ni, de, de2, undo_remove)) {
index 70f8c85..ad430d5 100644 (file)
@@ -156,8 +156,8 @@ static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de)
        err = ntfs_link_inode(inode, de);
 
        if (!err) {
-               dir->i_ctime = dir->i_mtime = inode->i_ctime =
-                       current_time(dir);
+               dir->i_mtime = inode_set_ctime_to_ts(inode,
+                                                    inode_set_ctime_current(dir));
                mark_inode_dirty(inode);
                mark_inode_dirty(dir);
                d_instantiate(de, inode);
@@ -324,14 +324,11 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
                /* Restore after failed rename failed too. */
                _ntfs_bad_inode(inode);
        } else if (!err) {
-               inode->i_ctime = dir->i_ctime = dir->i_mtime =
-                       current_time(dir);
+               simple_rename_timestamp(dir, dentry, new_dir, new_dentry);
                mark_inode_dirty(inode);
                mark_inode_dirty(dir);
-               if (dir != new_dir) {
-                       new_dir->i_mtime = new_dir->i_ctime = dir->i_ctime;
+               if (dir != new_dir)
                        mark_inode_dirty(new_dir);
-               }
 
                if (IS_DIRSYNC(dir))
                        ntfs_sync_inode(dir);
index 1a02072..5fffdde 100644 (file)
@@ -569,9 +569,9 @@ static void init_once(void *foo)
 }
 
 /*
- * put_ntfs - Noinline to reduce binary size.
+ * Noinline to reduce binary size.
  */
-static noinline void put_ntfs(struct ntfs_sb_info *sbi)
+static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
 {
        kfree(sbi->new_rec);
        kvfree(ntfs_put_shared(sbi->upcase));
@@ -625,12 +625,6 @@ static void ntfs_put_super(struct super_block *sb)
 
        /* Mark rw ntfs as clear, if possible. */
        ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
-
-       put_mount_options(sbi->options);
-       put_ntfs(sbi);
-       sb->s_fs_info = NULL;
-
-       sync_blockdev(sb->s_bdev);
 }
 
 static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1564,15 +1558,7 @@ load_root:
 put_inode_out:
        iput(inode);
 out:
-       /*
-        * Free resources here.
-        * ntfs_fs_free will be called with fc->s_fs_info = NULL
-        */
-       put_mount_options(sbi->options);
-       put_ntfs(sbi);
-       sb->s_fs_info = NULL;
        kfree(boot2);
-
        return err;
 }
 
@@ -1659,7 +1645,7 @@ static void ntfs_fs_free(struct fs_context *fc)
        struct ntfs_sb_info *sbi = fc->s_fs_info;
 
        if (sbi)
-               put_ntfs(sbi);
+               ntfs3_free_sbi(sbi);
 
        if (opts)
                put_mount_options(opts);
@@ -1728,13 +1714,24 @@ free_opts:
        return -ENOMEM;
 }
 
+static void ntfs3_kill_sb(struct super_block *sb)
+{
+       struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+       kill_block_super(sb);
+
+       if (sbi->options)
+               put_mount_options(sbi->options);
+       ntfs3_free_sbi(sbi);
+}
+
 // clang-format off
 static struct file_system_type ntfs_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "ntfs3",
        .init_fs_context        = ntfs_init_fs_context,
        .parameters             = ntfs_fs_parameters,
-       .kill_sb                = kill_block_super,
+       .kill_sb                = ntfs3_kill_sb,
        .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 // clang-format on
index 023f314..29fd391 100644 (file)
@@ -637,7 +637,7 @@ static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap,
        if (!err) {
                set_cached_acl(inode, type, acl);
                inode->i_mode = mode;
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                mark_inode_dirty(inode);
        }
 
@@ -924,7 +924,7 @@ set_new_fa:
                          NULL);
 
 out:
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 
        return err;
index 9fd03ea..e75137a 100644 (file)
@@ -191,10 +191,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
        }
 
        inode->i_mode = new_mode;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        di->i_mode = cpu_to_le16(inode->i_mode);
-       di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-       di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+       di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
        ocfs2_update_inode_fsync_trans(handle, inode, 0);
 
        ocfs2_journal_dirty(handle, di_bh);
index 51c9392..aef58f1 100644 (file)
@@ -7436,10 +7436,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
        }
 
        inode->i_blocks = ocfs2_inode_sector_count(inode);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
 
-       di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
-       di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+       di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 
        ocfs2_update_inode_fsync_trans(handle, inode, 1);
        ocfs2_journal_dirty(handle, di_bh);
index 8dfc284..0fdba30 100644 (file)
@@ -2048,7 +2048,7 @@ out_write_size:
                }
                inode->i_blocks = ocfs2_inode_sector_count(inode);
                di->i_size = cpu_to_le64((u64)i_size_read(inode));
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
                di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
                if (handle)
index 694471f..8b123d5 100644 (file)
@@ -1658,7 +1658,7 @@ int __ocfs2_add_entry(handle_t *handle,
                                offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
 
                if (ocfs2_dirent_would_fit(de, rec_len)) {
-                       dir->i_mtime = dir->i_ctime = current_time(dir);
+                       dir->i_mtime = inode_set_ctime_current(dir);
                        retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
                        if (retval < 0) {
                                mlog_errno(retval);
@@ -2962,11 +2962,11 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
        ocfs2_dinode_new_extent_list(dir, di);
 
        i_size_write(dir, sb->s_blocksize);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 
        di->i_size = cpu_to_le64(sb->s_blocksize);
-       di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
-       di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
+       di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(dir).tv_sec);
+       di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(dir).tv_nsec);
        ocfs2_update_inode_fsync_trans(handle, dir, 1);
 
        /*
index ba26c55..8126512 100644 (file)
@@ -337,7 +337,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
        if (inode) {
                inode->i_ino = get_next_ino();
                inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                inc_nlink(inode);
 
                inode->i_fop = &simple_dir_operations;
@@ -360,7 +360,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
 
        inode->i_ino = get_next_ino();
        inode_init_owner(&nop_mnt_idmap, inode, parent, mode);
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 
        ip = DLMFS_I(inode);
        ip->ip_conn = DLMFS_I(parent)->ip_conn;
index c28bc98..c3e2961 100644 (file)
@@ -2162,6 +2162,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
        struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
        struct ocfs2_meta_lvb *lvb;
+       struct timespec64 ctime = inode_get_ctime(inode);
 
        lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
 
@@ -2185,7 +2186,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
        lvb->lvb_iatime_packed  =
                cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
        lvb->lvb_ictime_packed =
-               cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
+               cpu_to_be64(ocfs2_pack_timespec(&ctime));
        lvb->lvb_imtime_packed =
                cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
        lvb->lvb_iattr    = cpu_to_be32(oi->ip_attr);
@@ -2208,6 +2209,7 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
        struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
        struct ocfs2_meta_lvb *lvb;
+       struct timespec64 ctime;
 
        mlog_meta_lvb(0, lockres);
 
@@ -2238,8 +2240,9 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
                              be64_to_cpu(lvb->lvb_iatime_packed));
        ocfs2_unpack_timespec(&inode->i_mtime,
                              be64_to_cpu(lvb->lvb_imtime_packed));
-       ocfs2_unpack_timespec(&inode->i_ctime,
+       ocfs2_unpack_timespec(&ctime,
                              be64_to_cpu(lvb->lvb_ictime_packed));
+       inode_set_ctime_to_ts(inode, ctime);
        spin_unlock(&oi->ip_lock);
        return 0;
 }
index 91a1945..3b91b4c 100644 (file)
@@ -232,8 +232,10 @@ int ocfs2_should_update_atime(struct inode *inode,
                return 0;
 
        if (vfsmnt->mnt_flags & MNT_RELATIME) {
+               struct timespec64 ctime = inode_get_ctime(inode);
+
                if ((timespec64_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
-                   (timespec64_compare(&inode->i_atime, &inode->i_ctime) <= 0))
+                   (timespec64_compare(&inode->i_atime, &ctime) <= 0))
                        return 1;
 
                return 0;
@@ -294,7 +296,7 @@ int ocfs2_set_inode_size(handle_t *handle,
 
        i_size_write(inode, new_i_size);
        inode->i_blocks = ocfs2_inode_sector_count(inode);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
 
        status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
        if (status < 0) {
@@ -415,12 +417,12 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
        }
 
        i_size_write(inode, new_i_size);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
 
        di = (struct ocfs2_dinode *) fe_bh->b_data;
        di->i_size = cpu_to_le64(new_i_size);
-       di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
-       di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+       di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
        ocfs2_update_inode_fsync_trans(handle, inode, 0);
 
        ocfs2_journal_dirty(handle, fe_bh);
@@ -824,7 +826,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
        i_size_write(inode, abs_to);
        inode->i_blocks = ocfs2_inode_sector_count(inode);
        di->i_size = cpu_to_le64((u64)i_size_read(inode));
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
        di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
        di->i_mtime_nsec = di->i_ctime_nsec;
@@ -1317,7 +1319,7 @@ int ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path,
                goto bail;
        }
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        /*
         * If there is inline data in the inode, the inode will normally not
         * have data blocks allocated (it may have an external xattr block).
@@ -2043,7 +2045,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
                goto out_inode_unlock;
        }
 
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
        if (ret < 0)
                mlog_errno(ret);
@@ -2793,10 +2795,11 @@ const struct file_operations ocfs2_fops = {
        .remap_file_range = ocfs2_remap_file_range,
 };
 
+WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
 const struct file_operations ocfs2_dops = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
-       .iterate        = ocfs2_readdir,
+       .iterate_shared = shared_ocfs2_readdir,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_dir_release,
        .open           = ocfs2_dir_open,
@@ -2842,7 +2845,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
 const struct file_operations ocfs2_dops_no_plocks = {
        .llseek         = generic_file_llseek,
        .read           = generic_read_dir,
-       .iterate        = ocfs2_readdir,
+       .iterate_shared = shared_ocfs2_readdir,
        .fsync          = ocfs2_sync_file,
        .release        = ocfs2_dir_release,
        .open           = ocfs2_dir_open,
index bb116c3..e877160 100644 (file)
@@ -306,8 +306,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
        inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
        inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
        inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
-       inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime);
-       inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
+       inode_set_ctime(inode, le64_to_cpu(fe->i_ctime),
+                       le32_to_cpu(fe->i_ctime_nsec));
 
        if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno))
                mlog(ML_ERROR,
@@ -1314,8 +1314,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
        fe->i_mode = cpu_to_le16(inode->i_mode);
        fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
        fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
-       fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-       fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+       fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
        fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
        fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
 
@@ -1352,8 +1352,8 @@ void ocfs2_refresh_inode(struct inode *inode,
        inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
        inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
        inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
-       inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime);
-       inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
+       inode_set_ctime(inode, le64_to_cpu(fe->i_ctime),
+                       le32_to_cpu(fe->i_ctime_nsec));
 
        spin_unlock(&OCFS2_I(inode)->ip_lock);
 }
index 25d8072..c19c730 100644 (file)
@@ -557,7 +557,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
             (unsigned long)bh,
             (unsigned long long)bh->b_blocknr);
 
-       ocfs2_error(bh->b_bdev->bd_super,
+       ocfs2_error(bh->b_assoc_map->host->i_sb,
                    "JBD2 has aborted our journal, ocfs2 cannot continue\n");
 }
 
@@ -780,14 +780,14 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
                mlog_errno(status);
                if (!is_handle_aborted(handle)) {
                        journal_t *journal = handle->h_transaction->t_journal;
-                       struct super_block *sb = bh->b_bdev->bd_super;
 
                        mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
                                        "Aborting transaction and journal.\n");
                        handle->h_err = status;
                        jbd2_journal_abort_handle(handle);
                        jbd2_journal_abort(journal, status);
-                       ocfs2_abort(sb, "Journal already aborted.\n");
+                       ocfs2_abort(bh->b_assoc_map->host->i_sb,
+                                   "Journal already aborted.\n");
                }
        }
 }
index b1e32ec..05d6796 100644 (file)
@@ -950,9 +950,9 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
        }
 
        di = (struct ocfs2_dinode *)di_bh->b_data;
-       inode->i_ctime = current_time(inode);
-       di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-       di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       inode_set_ctime_current(inode);
+       di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+       di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
        ocfs2_update_inode_fsync_trans(handle, inode, 0);
 
        ocfs2_journal_dirty(handle, di_bh);
index 17c5222..e4a684d 100644 (file)
@@ -793,10 +793,10 @@ static int ocfs2_link(struct dentry *old_dentry,
        }
 
        inc_nlink(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        ocfs2_set_links_count(fe, inode->i_nlink);
-       fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-       fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+       fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
        ocfs2_journal_dirty(handle, fe_bh);
 
        err = ocfs2_add_entry(handle, dentry, inode,
@@ -995,7 +995,7 @@ static int ocfs2_unlink(struct inode *dir,
        ocfs2_set_links_count(fe, inode->i_nlink);
        ocfs2_journal_dirty(handle, fe_bh);
 
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        if (S_ISDIR(inode->i_mode))
                drop_nlink(dir);
 
@@ -1537,7 +1537,7 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
                                         new_dir_bh, &target_insert);
        }
 
-       old_inode->i_ctime = current_time(old_inode);
+       inode_set_ctime_current(old_inode);
        mark_inode_dirty(old_inode);
 
        status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode),
@@ -1546,8 +1546,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
        if (status >= 0) {
                old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
 
-               old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
-               old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
+               old_di->i_ctime = cpu_to_le64(inode_get_ctime(old_inode).tv_sec);
+               old_di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(old_inode).tv_nsec);
                ocfs2_journal_dirty(handle, old_inode_bh);
        } else
                mlog_errno(status);
@@ -1586,9 +1586,9 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
 
        if (new_inode) {
                drop_nlink(new_inode);
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
        }
-       old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+       old_dir->i_mtime = inode_set_ctime_current(old_dir);
 
        if (update_dot_dot) {
                status = ocfs2_update_entry(old_inode, handle,
@@ -1610,7 +1610,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
 
        if (old_dir != new_dir) {
                /* Keep the same times on both directories.*/
-               new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime;
+               new_dir->i_mtime = inode_set_ctime_to_ts(new_dir,
+                                                        inode_get_ctime(old_dir));
 
                /*
                 * This will also pick up the i_nlink change from the
index 564ab48..25c8ec3 100644 (file)
@@ -3750,9 +3750,9 @@ static int ocfs2_change_ctime(struct inode *inode,
                goto out_commit;
        }
 
-       inode->i_ctime = current_time(inode);
-       di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-       di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       inode_set_ctime_current(inode);
+       di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+       di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
 
        ocfs2_journal_dirty(handle, di_bh);
 
@@ -4073,10 +4073,10 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
                 * we want mtime to appear identical to the source and
                 * update ctime.
                 */
-               t_inode->i_ctime = current_time(t_inode);
+               inode_set_ctime_current(t_inode);
 
-               di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
-               di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+               di->i_ctime = cpu_to_le64(inode_get_ctime(t_inode).tv_sec);
+               di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(t_inode).tv_nsec);
 
                t_inode->i_mtime = s_inode->i_mtime;
                di->i_mtime = s_di->i_mtime;
@@ -4456,7 +4456,7 @@ int ocfs2_reflink_update_dest(struct inode *dest,
        if (newlen > i_size_read(dest))
                i_size_write(dest, newlen);
        spin_unlock(&OCFS2_I(dest)->ip_lock);
-       dest->i_ctime = dest->i_mtime = current_time(dest);
+       dest->i_mtime = inode_set_ctime_current(dest);
 
        ret = ocfs2_mark_inode_dirty(handle, dest, d_bh);
        if (ret) {
index 4ac77ff..6510ad7 100644 (file)
@@ -3421,9 +3421,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
                        goto out;
                }
 
-               inode->i_ctime = current_time(inode);
-               di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
-               di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+               inode_set_ctime_current(inode);
+               di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+               di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
                ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
        }
 out:
index 82cf7e9..6bda275 100644 (file)
@@ -143,7 +143,7 @@ static int omfs_add_link(struct dentry *dentry, struct inode *inode)
        mark_buffer_dirty(bh);
        brelse(bh);
 
-       dir->i_ctime = current_time(dir);
+       inode_set_ctime_current(dir);
 
        /* mark affected inodes dirty to rebuild checksums */
        mark_inode_dirty(dir);
@@ -399,7 +399,7 @@ static int omfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
        if (err)
                goto out;
 
-       old_inode->i_ctime = current_time(old_inode);
+       inode_set_ctime_current(old_inode);
        mark_inode_dirty(old_inode);
 out:
        return err;
index c4c79e0..2f8c188 100644 (file)
@@ -51,7 +51,7 @@ struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
        inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
        inode->i_mapping->a_ops = &omfs_aops;
 
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        switch (mode & S_IFMT) {
        case S_IFDIR:
                inode->i_op = &omfs_dir_inops;
@@ -134,8 +134,8 @@ static int __omfs_write_inode(struct inode *inode, int wait)
        oi->i_head.h_magic = OMFS_IMAGIC;
        oi->i_size = cpu_to_be64(inode->i_size);
 
-       ctime = inode->i_ctime.tv_sec * 1000LL +
-               ((inode->i_ctime.tv_nsec + 999)/1000);
+       ctime = inode_get_ctime(inode).tv_sec * 1000LL +
+               ((inode_get_ctime(inode).tv_nsec + 999)/1000);
        oi->i_ctime = cpu_to_be64(ctime);
 
        omfs_update_checksums(oi);
@@ -232,10 +232,9 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
 
        inode->i_atime.tv_sec = ctime;
        inode->i_mtime.tv_sec = ctime;
-       inode->i_ctime.tv_sec = ctime;
+       inode_set_ctime(inode, ctime, nsecs);
        inode->i_atime.tv_nsec = nsecs;
        inode->i_mtime.tv_nsec = nsecs;
-       inode->i_ctime.tv_nsec = nsecs;
 
        inode->i_mapping->a_ops = &omfs_aops;
 
index 0c55c8e..98f6601 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -671,11 +671,20 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
        return err;
 }
 
-static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
+static int do_fchmodat(int dfd, const char __user *filename, umode_t mode,
+                      unsigned int flags)
 {
        struct path path;
        int error;
-       unsigned int lookup_flags = LOOKUP_FOLLOW;
+       unsigned int lookup_flags;
+
+       if (unlikely(flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)))
+               return -EINVAL;
+
+       lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+       if (flags & AT_EMPTY_PATH)
+               lookup_flags |= LOOKUP_EMPTY;
+
 retry:
        error = user_path_at(dfd, filename, lookup_flags, &path);
        if (!error) {
@@ -689,15 +698,21 @@ retry:
        return error;
 }
 
+SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
+               umode_t, mode, unsigned int, flags)
+{
+       return do_fchmodat(dfd, filename, mode, flags);
+}
+
 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
                umode_t, mode)
 {
-       return do_fchmodat(dfd, filename, mode);
+       return do_fchmodat(dfd, filename, mode, 0);
 }
 
 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 {
-       return do_fchmodat(AT_FDCWD, filename, mode);
+       return do_fchmodat(AT_FDCWD, filename, mode, 0);
 }
 
 /*
@@ -1150,7 +1165,7 @@ EXPORT_SYMBOL_GPL(kernel_file_open);
  * backing_file_open - open a backing file for kernel internal use
  * @path:      path of the file to open
  * @flags:     open flags
- * @path:      path of the backing file
+ * @real_path: path of the backing file
  * @cred:      credentials for open
  *
  * Open a backing file for a stackable filesystem (e.g., overlayfs).
@@ -1322,7 +1337,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
                lookup_flags |= LOOKUP_IN_ROOT;
        if (how->resolve & RESOLVE_CACHED) {
                /* Don't bother even trying for create/truncate/tmpfile open */
-               if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
+               if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
                        return -EAGAIN;
                lookup_flags |= LOOKUP_CACHED;
        }
@@ -1503,7 +1518,7 @@ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
  * "id" is the POSIX thread ID. We use the
  * files pointer for this..
  */
-int filp_close(struct file *filp, fl_owner_t id)
+static int filp_flush(struct file *filp, fl_owner_t id)
 {
        int retval = 0;
 
@@ -1520,10 +1535,18 @@ int filp_close(struct file *filp, fl_owner_t id)
                dnotify_flush(filp, id);
                locks_remove_posix(filp, id);
        }
-       fput(filp);
        return retval;
 }
 
+int filp_close(struct file *filp, fl_owner_t id)
+{
+       int retval;
+
+       retval = filp_flush(filp, id);
+       fput(filp);
+
+       return retval;
+}
 EXPORT_SYMBOL(filp_close);
 
 /*
@@ -1533,7 +1556,20 @@ EXPORT_SYMBOL(filp_close);
  */
 SYSCALL_DEFINE1(close, unsigned int, fd)
 {
-       int retval = close_fd(fd);
+       int retval;
+       struct file *file;
+
+       file = close_fd_get_file(fd);
+       if (!file)
+               return -EBADF;
+
+       retval = filp_flush(file, current->files);
+
+       /*
+        * We're returning to user space. Don't bother
+        * with any delayed fput() cases.
+        */
+       __fput_sync(file);
 
        /* can't restart close syscall because file table entry was cleared */
        if (unlikely(retval == -ERESTARTSYS ||
@@ -1546,7 +1582,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
 }
 
 /**
- * close_range() - Close all file descriptors in a given range.
+ * sys_close_range() - Close all file descriptors in a given range.
  *
  * @fd:     starting file descriptor to close
  * @max_fd: last file descriptor to close
index f0b7f4d..b2457cb 100644 (file)
@@ -237,7 +237,7 @@ found:
        if (IS_ERR(inode))
                return ERR_CAST(inode);
        if (inode->i_state & I_NEW) {
-               inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
                ent_oi = OP_I(inode);
                ent_oi->type = ent_type;
                ent_oi->u = ent_data;
@@ -387,8 +387,7 @@ static int openprom_fill_super(struct super_block *s, struct fs_context *fc)
                goto out_no_root;
        }
 
-       root_inode->i_mtime = root_inode->i_atime =
-               root_inode->i_ctime = current_time(root_inode);
+       root_inode->i_mtime = root_inode->i_atime = inode_set_ctime_current(root_inode);
        root_inode->i_op = &openprom_inode_operations;
        root_inode->i_fop = &openprom_operations;
        root_inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
index 9014bbc..0859122 100644 (file)
@@ -871,7 +871,7 @@ int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path,
        ret = orangefs_inode_getattr(inode,
            request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0);
        if (ret == 0) {
-               generic_fillattr(&nop_mnt_idmap, inode, stat);
+               generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
                /* override block size reported to stat */
                if (!(request_mask & STATX_SIZE))
@@ -900,12 +900,13 @@ int orangefs_permission(struct mnt_idmap *idmap,
        return generic_permission(&nop_mnt_idmap, inode, mask);
 }
 
-int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags)
+int orangefs_update_time(struct inode *inode, int flags)
 {
        struct iattr iattr;
+
        gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n",
            get_khandle_from_ino(inode));
-       generic_update_time(inode, time, flags);
+       flags = generic_update_time(inode, flags);
        memset(&iattr, 0, sizeof iattr);
         if (flags & S_ATIME)
                iattr.ia_valid |= ATTR_ATIME;
index 77518e2..c9dfd5c 100644 (file)
@@ -421,7 +421,7 @@ static int orangefs_rename(struct mnt_idmap *idmap,
                     ret);
 
        if (new_dentry->d_inode)
-               new_dentry->d_inode->i_ctime = current_time(new_dentry->d_inode);
+               inode_set_ctime_current(d_inode(new_dentry));
 
        op_release(new_op);
        return ret;
index ce20d34..b711654 100644 (file)
@@ -370,7 +370,7 @@ int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path,
 int orangefs_permission(struct mnt_idmap *idmap,
                        struct inode *inode, int mask);
 
-int orangefs_update_time(struct inode *, struct timespec64 *, int);
+int orangefs_update_time(struct inode *, int);
 
 /*
  * defined in xattr.c
index 46b7dcf..0a9fcfd 100644 (file)
@@ -361,11 +361,11 @@ again2:
            downcall.resp.getattr.attributes.atime;
        inode->i_mtime.tv_sec = (time64_t)new_op->
            downcall.resp.getattr.attributes.mtime;
-       inode->i_ctime.tv_sec = (time64_t)new_op->
-           downcall.resp.getattr.attributes.ctime;
+       inode_set_ctime(inode,
+                       (time64_t)new_op->downcall.resp.getattr.attributes.ctime,
+                       0);
        inode->i_atime.tv_nsec = 0;
        inode->i_mtime.tv_nsec = 0;
-       inode->i_ctime.tv_nsec = 0;
 
        /* special case: mark the root inode as sticky */
        inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
index 21245b0..eaa1e6b 100644 (file)
@@ -239,6 +239,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 static void ovl_file_accessed(struct file *file)
 {
        struct inode *inode, *upperinode;
+       struct timespec64 ctime, uctime;
 
        if (file->f_flags & O_NOATIME)
                return;
@@ -249,10 +250,12 @@ static void ovl_file_accessed(struct file *file)
        if (!upperinode)
                return;
 
+       ctime = inode_get_ctime(inode);
+       uctime = inode_get_ctime(upperinode);
        if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
-            !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
+            !timespec64_equal(&ctime, &uctime))) {
                inode->i_mtime = upperinode->i_mtime;
-               inode->i_ctime = upperinode->i_ctime;
+               inode_set_ctime_to_ts(inode, uctime);
        }
 
        touch_atime(&file->f_path);
@@ -290,10 +293,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
        if (iocb->ki_flags & IOCB_WRITE) {
                struct inode *inode = file_inode(orig_iocb->ki_filp);
 
-               /* Actually acquired in ovl_write_iter() */
-               __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
-                                     SB_FREEZE_WRITE);
-               file_end_write(iocb->ki_filp);
+               kiocb_end_write(iocb);
                ovl_copyattr(inode);
        }
 
@@ -409,10 +409,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
                if (!aio_req)
                        goto out;
 
-               file_start_write(real.file);
-               /* Pacify lockdep, same trick as done in aio_write() */
-               __sb_writers_release(file_inode(real.file)->i_sb,
-                                    SB_FREEZE_WRITE);
                aio_req->fd = real;
                real.flags = 0;
                aio_req->orig_iocb = iocb;
@@ -420,6 +416,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
                aio_req->iocb.ki_flags = ifl;
                aio_req->iocb.ki_complete = ovl_aio_rw_complete;
                refcount_set(&aio_req->ref, 2);
+               kiocb_start_write(&aio_req->iocb);
                ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
                ovl_aio_put(aio_req);
                if (ret != -EIOCBQUEUED)
index a63e574..f22e27b 100644 (file)
@@ -693,7 +693,7 @@ int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
 }
 #endif
 
-int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
+int ovl_update_time(struct inode *inode, int flags)
 {
        if (flags & S_ATIME) {
                struct ovl_fs *ofs = inode->i_sb->s_fs_info;
index 9402591..8bbe617 100644 (file)
@@ -665,7 +665,7 @@ static inline struct posix_acl *ovl_get_acl_path(const struct path *path,
 }
 #endif
 
-int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
+int ovl_update_time(struct inode *inode, int flags);
 bool ovl_is_private_xattr(struct super_block *sb, const char *name);
 
 struct ovl_inode_params {
index ee5c473..de39e06 100644 (file)
@@ -954,10 +954,11 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
        return 0;
 }
 
+WRAP_DIR_ITER(ovl_iterate) // FIXME!
 const struct file_operations ovl_dir_operations = {
        .read           = generic_read_dir,
        .open           = ovl_dir_open,
-       .iterate        = ovl_iterate,
+       .iterate_shared = shared_ovl_iterate,
        .llseek         = ovl_dir_llseek,
        .fsync          = ovl_dir_fsync,
        .release        = ovl_dir_release,
index 5b069f1..cc89774 100644 (file)
@@ -1460,7 +1460,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
                ovl_trusted_xattr_handlers;
        sb->s_fs_info = ofs;
        sb->s_flags |= SB_POSIXACL;
-       sb->s_iflags |= SB_I_SKIP_SYNC;
+       sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE;
 
        err = -ENOMEM;
        root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe);
index 7ef9e13..c210b5d 100644 (file)
@@ -1202,6 +1202,6 @@ void ovl_copyattr(struct inode *inode)
        inode->i_mode = realinode->i_mode;
        inode->i_atime = realinode->i_atime;
        inode->i_mtime = realinode->i_mtime;
-       inode->i_ctime = realinode->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(realinode));
        i_size_write(inode, i_size_read(realinode));
 }
index 2d88f73..6c1a9b1 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -489,7 +489,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
                head = pipe->head;
                if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
                        unsigned int mask = pipe->ring_size - 1;
-                       struct pipe_buffer *buf = &pipe->bufs[head & mask];
+                       struct pipe_buffer *buf;
                        struct page *page = pipe->tmp_page;
                        int copied;
 
@@ -899,7 +899,7 @@ static struct inode * get_pipe_inode(void)
        inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 
        return inode;
 
@@ -1236,7 +1236,7 @@ const struct file_operations pipefifo_fops = {
  * Currently we rely on the pipe array holding a power-of-2 number
  * of pages. Returns 0 on error.
  */
-unsigned int round_pipe_size(unsigned long size)
+unsigned int round_pipe_size(unsigned int size)
 {
        if (size > (1U << 31))
                return 0;
@@ -1319,7 +1319,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
  * Allocate a new array of pipe buffers and copy the info over. Returns the
  * pipe size if successful, or return -ERROR on error.
  */
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
 {
        unsigned long user_bufs;
        unsigned int nr_slots, size;
@@ -1387,7 +1387,7 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
        return pipe;
 }
 
-long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
 {
        struct pipe_inode_info *pipe;
        long ret;
index 7fa1b73..a05fe94 100644 (file)
@@ -1027,7 +1027,7 @@ int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
                        return error;
        }
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        if (IS_I_VERSION(inode))
                inode_inc_iversion(inode);
        set_cached_acl(inode, type, acl);
index 05452c3..7576eff 100644 (file)
@@ -1902,7 +1902,7 @@ struct inode *proc_pid_make_inode(struct super_block *sb,
        ei = PROC_I(inode);
        inode->i_mode = mode;
        inode->i_ino = get_next_ino();
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_op = &proc_def_inode_operations;
 
        /*
@@ -1966,7 +1966,7 @@ int pid_getattr(struct mnt_idmap *idmap, const struct path *path,
        struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
        struct task_struct *task;
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
        stat->uid = GLOBAL_ROOT_UID;
        stat->gid = GLOBAL_ROOT_GID;
@@ -2817,7 +2817,7 @@ static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
 \
 static const struct file_operations proc_##LSM##_attr_dir_ops = { \
        .read           = generic_read_dir, \
-       .iterate        = proc_##LSM##_attr_dir_iterate, \
+       .iterate_shared = proc_##LSM##_attr_dir_iterate, \
        .llseek         = default_llseek, \
 }; \
 \
@@ -3583,7 +3583,8 @@ static int proc_tid_comm_permission(struct mnt_idmap *idmap,
 }
 
 static const struct inode_operations proc_tid_comm_inode_operations = {
-               .permission = proc_tid_comm_permission,
+               .setattr        = proc_setattr,
+               .permission     = proc_tid_comm_permission,
 };
 
 /*
@@ -3899,7 +3900,7 @@ static int proc_task_getattr(struct mnt_idmap *idmap,
 {
        struct inode *inode = d_inode(path->dentry);
        struct task_struct *p = get_proc_task(inode);
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
        if (p) {
                stat->nlink += get_nr_threads(p);
index b3140de..6276b39 100644 (file)
@@ -352,7 +352,7 @@ static int proc_fd_getattr(struct mnt_idmap *idmap,
        struct inode *inode = d_inode(path->dentry);
        int rv = 0;
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
        /* If it's a directory, put the number of open fds there */
        if (S_ISDIR(inode->i_mode)) {
index 42ae38f..775ce0b 100644 (file)
@@ -146,7 +146,7 @@ static int proc_getattr(struct mnt_idmap *idmap,
                }
        }
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        return 0;
 }
 
index 67b09a1..532dc9d 100644 (file)
@@ -660,7 +660,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 
        inode->i_private = de->data;
        inode->i_ino = de->low_ino;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        PROC_I(inode)->pde = de;
        if (is_empty_pde(de)) {
                make_empty_dir_inode(inode);
index 9cb32e1..23fc24d 100644 (file)
@@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
 
 static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
+       struct file *file = iocb->ki_filp;
+       char *buf = file->private_data;
        loff_t *fpos = &iocb->ki_pos;
        size_t phdrs_offset, notes_offset, data_offset;
        size_t page_offline_frozen = 1;
@@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
                case KCORE_VMEMMAP:
                case KCORE_TEXT:
                        /*
-                        * We use _copy_to_iter() to bypass usermode hardening
-                        * which would otherwise prevent this operation.
+                        * Sadly we must use a bounce buffer here to be able to
+                        * make use of copy_from_kernel_nofault(), as these
+                        * memory regions might not always be mapped on all
+                        * architectures.
                         */
-                       if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
+                       if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+                               if (iov_iter_zero(tsz, iter) != tsz) {
+                                       ret = -EFAULT;
+                                       goto out;
+                               }
+                       /*
+                        * We know the bounce buffer is safe to copy from, so
+                        * use _copy_to_iter() directly.
+                        */
+                       } else if (_copy_to_iter(buf, tsz, iter) != tsz) {
                                ret = -EFAULT;
                                goto out;
                        }
@@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
        if (ret)
                return ret;
 
+       filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!filp->private_data)
+               return -ENOMEM;
+
        if (kcore_need_update)
                kcore_update_ram();
        if (i_size_read(inode) != proc_root_kcore->size) {
@@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
        return 0;
 }
 
+static int release_kcore(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+       return 0;
+}
+
 static const struct proc_ops kcore_proc_ops = {
        .proc_read_iter = read_kcore_iter,
        .proc_open      = open_kcore,
+       .proc_release   = release_kcore,
        .proc_lseek     = default_llseek,
 };
 
index a0c0419..2ba31b6 100644 (file)
@@ -308,7 +308,7 @@ static int proc_tgid_net_getattr(struct mnt_idmap *idmap,
 
        net = get_proc_task_net(inode);
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
 
        if (net != NULL) {
                stat->nlink = net->proc_net->nlink;
@@ -321,6 +321,7 @@ static int proc_tgid_net_getattr(struct mnt_idmap *idmap,
 const struct inode_operations proc_net_inode_operations = {
        .lookup         = proc_tgid_net_lookup,
        .getattr        = proc_tgid_net_getattr,
+       .setattr        = proc_setattr,
 };
 
 static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx)
index 5ea4265..bf06344 100644 (file)
@@ -463,7 +463,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
        head->count++;
        spin_unlock(&sysctl_lock);
 
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_mode = table->mode;
        if (!S_ISDIR(table->mode)) {
                inode->i_mode |= S_IFREG;
@@ -849,7 +849,7 @@ static int proc_sys_getattr(struct mnt_idmap *idmap,
        if (IS_ERR(head))
                return PTR_ERR(head);
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        if (table)
                stat->mode = (stat->mode & S_IFMT) | table->mode;
 
index a86e65a..9191248 100644 (file)
@@ -314,7 +314,8 @@ static int proc_root_getattr(struct mnt_idmap *idmap,
                             const struct path *path, struct kstat *stat,
                             u32 request_mask, unsigned int query_flags)
 {
-       generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+                        stat);
        stat->nlink = proc_root.nlink + nr_processes();
        return 0;
 }
index 72cd69b..ecc4da8 100644 (file)
@@ -46,7 +46,7 @@ int proc_setup_self(struct super_block *s)
                struct inode *inode = new_inode(s);
                if (inode) {
                        inode->i_ino = self_inum;
-                       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
                        inode->i_mode = S_IFLNK | S_IRWXUGO;
                        inode->i_uid = GLOBAL_ROOT_UID;
                        inode->i_gid = GLOBAL_ROOT_GID;
index 507cd4e..fafff1b 100644 (file)
@@ -587,8 +587,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
        bool migration = false;
 
        if (pmd_present(*pmd)) {
-               /* FOLL_DUMP will return -EFAULT on huge zero page */
-               page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+               page = vm_normal_page_pmd(vma, addr, *pmd);
        } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
                swp_entry_t entry = pmd_to_swp_entry(*pmd);
 
@@ -758,12 +757,14 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
 static const struct mm_walk_ops smaps_walk_ops = {
        .pmd_entry              = smaps_pte_range,
        .hugetlb_entry          = smaps_hugetlb_range,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 static const struct mm_walk_ops smaps_shmem_walk_ops = {
        .pmd_entry              = smaps_pte_range,
        .hugetlb_entry          = smaps_hugetlb_range,
        .pte_hole               = smaps_pte_hole,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 /*
@@ -1245,6 +1246,7 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
 static const struct mm_walk_ops clear_refs_walk_ops = {
        .pmd_entry              = clear_refs_pte_range,
        .test_walk              = clear_refs_test_walk,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 static ssize_t clear_refs_write(struct file *file, const char __user *buf,
@@ -1622,6 +1624,7 @@ static const struct mm_walk_ops pagemap_ops = {
        .pmd_entry      = pagemap_pmd_range,
        .pte_hole       = pagemap_pte_hole,
        .hugetlb_entry  = pagemap_hugetlb_range,
+       .walk_lock      = PGWALK_RDLOCK,
 };
 
 /*
@@ -1935,6 +1938,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
 static const struct mm_walk_ops show_numa_ops = {
        .hugetlb_entry = gather_hugetlb_stats,
        .pmd_entry = gather_pte_stats,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 /*
index a553273..63ac1f9 100644 (file)
@@ -46,7 +46,7 @@ int proc_setup_thread_self(struct super_block *s)
                struct inode *inode = new_inode(s);
                if (inode) {
                        inode->i_ino = thread_self_inum;
-                       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+                       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
                        inode->i_mode = S_IFLNK | S_IRWXUGO;
                        inode->i_uid = GLOBAL_ROOT_UID;
                        inode->i_gid = GLOBAL_ROOT_GID;
index cb80a77..1fb213f 100644 (file)
@@ -132,7 +132,7 @@ ssize_t read_from_oldmem(struct iov_iter *iter, size_t count,
                         u64 *ppos, bool encrypted)
 {
        unsigned long pfn, offset;
-       size_t nr_bytes;
+       ssize_t nr_bytes;
        ssize_t read = 0, tmp;
        int idx;
 
index c49d554..3acc386 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config PSTORE
        tristate "Persistent store support"
-       select CRYPTO if PSTORE_COMPRESS
        default n
        help
           This option enables generic access to platform level
@@ -22,99 +21,18 @@ config PSTORE_DEFAULT_KMSG_BYTES
          Defines default size of pstore kernel log storage.
          Can be enlarged if needed, not recommended to shrink it.
 
-config PSTORE_DEFLATE_COMPRESS
-       tristate "DEFLATE (ZLIB) compression"
-       default y
-       depends on PSTORE
-       select CRYPTO_DEFLATE
-       help
-         This option enables DEFLATE (also known as ZLIB) compression
-         algorithm support.
-
-config PSTORE_LZO_COMPRESS
-       tristate "LZO compression"
-       depends on PSTORE
-       select CRYPTO_LZO
-       help
-         This option enables LZO compression algorithm support.
-
-config PSTORE_LZ4_COMPRESS
-       tristate "LZ4 compression"
-       depends on PSTORE
-       select CRYPTO_LZ4
-       help
-         This option enables LZ4 compression algorithm support.
-
-config PSTORE_LZ4HC_COMPRESS
-       tristate "LZ4HC compression"
-       depends on PSTORE
-       select CRYPTO_LZ4HC
-       help
-         This option enables LZ4HC (high compression) mode algorithm.
-
-config PSTORE_842_COMPRESS
-       bool "842 compression"
-       depends on PSTORE
-       select CRYPTO_842
-       help
-         This option enables 842 compression algorithm support.
-
-config PSTORE_ZSTD_COMPRESS
-       bool "zstd compression"
-       depends on PSTORE
-       select CRYPTO_ZSTD
-       help
-         This option enables zstd compression algorithm support.
-
 config PSTORE_COMPRESS
-       def_bool y
+       bool "Pstore compression (deflate)"
        depends on PSTORE
-       depends on PSTORE_DEFLATE_COMPRESS || PSTORE_LZO_COMPRESS ||    \
-                  PSTORE_LZ4_COMPRESS || PSTORE_LZ4HC_COMPRESS ||      \
-                  PSTORE_842_COMPRESS || PSTORE_ZSTD_COMPRESS
-
-choice
-       prompt "Default pstore compression algorithm"
-       depends on PSTORE_COMPRESS
+       select ZLIB_INFLATE
+       select ZLIB_DEFLATE
+       default y
        help
-         This option chooses the default active compression algorithm.
-         This change be changed at boot with "pstore.compress=..." on
-         the kernel command line.
-
-         Currently, pstore has support for 6 compression algorithms:
-         deflate, lzo, lz4, lz4hc, 842 and zstd.
-
-         The default compression algorithm is deflate.
-
-       config PSTORE_DEFLATE_COMPRESS_DEFAULT
-               bool "deflate" if PSTORE_DEFLATE_COMPRESS
-
-       config PSTORE_LZO_COMPRESS_DEFAULT
-               bool "lzo" if PSTORE_LZO_COMPRESS
-
-       config PSTORE_LZ4_COMPRESS_DEFAULT
-               bool "lz4" if PSTORE_LZ4_COMPRESS
-
-       config PSTORE_LZ4HC_COMPRESS_DEFAULT
-               bool "lz4hc" if PSTORE_LZ4HC_COMPRESS
-
-       config PSTORE_842_COMPRESS_DEFAULT
-               bool "842" if PSTORE_842_COMPRESS
-
-       config PSTORE_ZSTD_COMPRESS_DEFAULT
-               bool "zstd" if PSTORE_ZSTD_COMPRESS
-
-endchoice
-
-config PSTORE_COMPRESS_DEFAULT
-       string
-       depends on PSTORE_COMPRESS
-       default "deflate" if PSTORE_DEFLATE_COMPRESS_DEFAULT
-       default "lzo" if PSTORE_LZO_COMPRESS_DEFAULT
-       default "lz4" if PSTORE_LZ4_COMPRESS_DEFAULT
-       default "lz4hc" if PSTORE_LZ4HC_COMPRESS_DEFAULT
-       default "842" if PSTORE_842_COMPRESS_DEFAULT
-       default "zstd" if PSTORE_ZSTD_COMPRESS_DEFAULT
+         Whether pstore records should be compressed before being written to
+         the backing store. This is implemented using the zlib 'deflate'
+         algorithm, using the library implementation instead of using the full
+         blown crypto API. This reduces the risk of secondary oopses or other
+         problems while pstore is recording panic metadata.
 
 config PSTORE_CONSOLE
        bool "Log kernel console messages"
index ffbadb8..5853607 100644 (file)
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
        if (!private)
                return;
        if (private->record) {
-               kfree(private->record->buf);
+               kvfree(private->record->buf);
                kfree(private->record->priv);
                kfree(private->record);
        }
@@ -223,7 +223,7 @@ static struct inode *pstore_get_inode(struct super_block *sb)
        struct inode *inode = new_inode(sb);
        if (inode) {
                inode->i_ino = get_next_ino();
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        }
        return inode;
 }
@@ -390,7 +390,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
        inode->i_private = private;
 
        if (record->time.tv_sec)
-               inode->i_mtime = inode->i_ctime = record->time;
+               inode->i_mtime = inode_set_ctime_to_ts(inode, record->time);
 
        d_add(dentry, inode);
 
index cbc0b46..62356d5 100644 (file)
 #include <linux/init.h>
 #include <linux/kmsg_dump.h>
 #include <linux/console.h>
+#include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/pstore.h>
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
-#include <linux/lzo.h>
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
-#include <linux/lz4.h>
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
-#include <linux/zstd.h>
-#endif
-#include <linux/crypto.h>
 #include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/jiffies.h>
+#include <linux/vmalloc.h>
 #include <linux/workqueue.h>
+#include <linux/zlib.h>
 
 #include "internal.h"
 
@@ -80,12 +73,21 @@ static char *backend;
 module_param(backend, charp, 0444);
 MODULE_PARM_DESC(backend, "specific backend to use");
 
-static char *compress =
-#ifdef CONFIG_PSTORE_COMPRESS_DEFAULT
-               CONFIG_PSTORE_COMPRESS_DEFAULT;
-#else
-               NULL;
-#endif
+/*
+ * pstore no longer implements compression via the crypto API, and only
+ * supports zlib deflate compression implemented using the zlib library
+ * interface. This removes additional complexity which is hard to justify for a
+ * diagnostic facility that has to operate in conditions where the system may
+ * have become unstable. Zlib deflate is comparatively small in terms of code
+ * size, and compresses ASCII text comparatively well. In terms of compression
+ * speed, deflate is not the best performer but for recording the log output on
+ * a kernel panic, this is not considered critical.
+ *
+ * The only remaining arguments supported by the compress= module parameter are
+ * 'deflate' and 'none'. To retain compatibility with existing installations,
+ * all other values are logged and replaced with 'deflate'.
+ */
+static char *compress = "deflate";
 module_param(compress, charp, 0444);
 MODULE_PARM_DESC(compress, "compression to use");
 
@@ -94,16 +96,9 @@ unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
 module_param(kmsg_bytes, ulong, 0444);
 MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)");
 
-/* Compression parameters */
-static struct crypto_comp *tfm;
-
-struct pstore_zbackend {
-       int (*zbufsize)(size_t size);
-       const char *name;
-};
+static void *compress_workspace;
 
 static char *big_oops_buf;
-static size_t big_oops_buf_sz;
 
 void pstore_set_kmsg_bytes(int bytes)
 {
@@ -168,206 +163,89 @@ static bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
        }
 }
 
-#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
-static int zbufsize_deflate(size_t size)
-{
-       size_t cmpr;
-
-       switch (size) {
-       /* buffer range for efivars */
-       case 1000 ... 2000:
-               cmpr = 56;
-               break;
-       case 2001 ... 3000:
-               cmpr = 54;
-               break;
-       case 3001 ... 3999:
-               cmpr = 52;
-               break;
-       /* buffer range for nvram, erst */
-       case 4000 ... 10000:
-               cmpr = 45;
-               break;
-       default:
-               cmpr = 60;
-               break;
-       }
-
-       return (size * 100) / cmpr;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
-static int zbufsize_lzo(size_t size)
-{
-       return lzo1x_worst_compress(size);
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
-static int zbufsize_lz4(size_t size)
-{
-       return LZ4_compressBound(size);
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS)
-static int zbufsize_842(size_t size)
-{
-       return size;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
-static int zbufsize_zstd(size_t size)
-{
-       return zstd_compress_bound(size);
-}
-#endif
-
-static const struct pstore_zbackend *zbackend __ro_after_init;
-
-static const struct pstore_zbackend zbackends[] = {
-#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
-       {
-               .zbufsize       = zbufsize_deflate,
-               .name           = "deflate",
-       },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
-       {
-               .zbufsize       = zbufsize_lzo,
-               .name           = "lzo",
-       },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS)
-       {
-               .zbufsize       = zbufsize_lz4,
-               .name           = "lz4",
-       },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
-       {
-               .zbufsize       = zbufsize_lz4,
-               .name           = "lz4hc",
-       },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS)
-       {
-               .zbufsize       = zbufsize_842,
-               .name           = "842",
-       },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
-       {
-               .zbufsize       = zbufsize_zstd,
-               .name           = "zstd",
-       },
-#endif
-       { }
-};
-
 static int pstore_compress(const void *in, void *out,
                           unsigned int inlen, unsigned int outlen)
 {
+       struct z_stream_s zstream = {
+               .next_in        = in,
+               .avail_in       = inlen,
+               .next_out       = out,
+               .avail_out      = outlen,
+               .workspace      = compress_workspace,
+       };
        int ret;
 
        if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS))
                return -EINVAL;
 
-       ret = crypto_comp_compress(tfm, in, inlen, out, &outlen);
-       if (ret) {
-               pr_err("crypto_comp_compress failed, ret = %d!\n", ret);
-               return ret;
-       }
+       ret = zlib_deflateInit2(&zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+                               -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+       if (ret != Z_OK)
+               return -EINVAL;
+
+       ret = zlib_deflate(&zstream, Z_FINISH);
+       if (ret != Z_STREAM_END)
+               return -EINVAL;
+
+       ret = zlib_deflateEnd(&zstream);
+       if (ret != Z_OK)
+               pr_warn_once("zlib_deflateEnd() failed: %d\n", ret);
 
-       return outlen;
+       return zstream.total_out;
 }
 
 static void allocate_buf_for_compression(void)
 {
-       struct crypto_comp *ctx;
-       int size;
        char *buf;
 
-       /* Skip if not built-in or compression backend not selected yet. */
-       if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !zbackend)
-               return;
-
-       /* Skip if no pstore backend yet or compression init already done. */
-       if (!psinfo || tfm)
-               return;
-
-       if (!crypto_has_comp(zbackend->name, 0, 0)) {
-               pr_err("Unknown compression: %s\n", zbackend->name);
+       /* Skip if not built-in or compression disabled. */
+       if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !compress ||
+           !strcmp(compress, "none")) {
+               compress = NULL;
                return;
        }
 
-       size = zbackend->zbufsize(psinfo->bufsize);
-       if (size <= 0) {
-               pr_err("Invalid compression size for %s: %d\n",
-                      zbackend->name, size);
-               return;
+       if (strcmp(compress, "deflate")) {
+               pr_err("Unsupported compression '%s', falling back to deflate\n",
+                      compress);
+               compress = "deflate";
        }
 
-       buf = kmalloc(size, GFP_KERNEL);
+       /*
+        * The compression buffer only needs to be as large as the maximum
+        * uncompressed record size, since any record that would be expanded by
+        * compression is just stored uncompressed.
+        */
+       buf = kvzalloc(psinfo->bufsize, GFP_KERNEL);
        if (!buf) {
-               pr_err("Failed %d byte compression buffer allocation for: %s\n",
-                      size, zbackend->name);
+               pr_err("Failed %zu byte compression buffer allocation for: %s\n",
+                      psinfo->bufsize, compress);
                return;
        }
 
-       ctx = crypto_alloc_comp(zbackend->name, 0, 0);
-       if (IS_ERR_OR_NULL(ctx)) {
-               kfree(buf);
-               pr_err("crypto_alloc_comp('%s') failed: %ld\n", zbackend->name,
-                      PTR_ERR(ctx));
+       compress_workspace =
+               vmalloc(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL));
+       if (!compress_workspace) {
+               pr_err("Failed to allocate zlib deflate workspace\n");
+               kvfree(buf);
                return;
        }
 
        /* A non-NULL big_oops_buf indicates compression is available. */
-       tfm = ctx;
-       big_oops_buf_sz = size;
        big_oops_buf = buf;
 
-       pr_info("Using crash dump compression: %s\n", zbackend->name);
+       pr_info("Using crash dump compression: %s\n", compress);
 }
 
 static void free_buf_for_compression(void)
 {
-       if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) {
-               crypto_free_comp(tfm);
-               tfm = NULL;
+       if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress_workspace) {
+               vfree(compress_workspace);
+               compress_workspace = NULL;
        }
-       kfree(big_oops_buf);
-       big_oops_buf = NULL;
-       big_oops_buf_sz = 0;
-}
 
-/*
- * Called when compression fails, since the printk buffer
- * would be fetched for compression calling it again when
- * compression fails would have moved the iterator of
- * printk buffer which results in fetching old contents.
- * Copy the recent messages from big_oops_buf to psinfo->buf
- */
-static size_t copy_kmsg_to_buffer(int hsize, size_t len)
-{
-       size_t total_len;
-       size_t diff;
-
-       total_len = hsize + len;
-
-       if (total_len > psinfo->bufsize) {
-               diff = total_len - psinfo->bufsize + hsize;
-               memcpy(psinfo->buf, big_oops_buf, hsize);
-               memcpy(psinfo->buf + hsize, big_oops_buf + diff,
-                                       psinfo->bufsize - hsize);
-               total_len = psinfo->bufsize;
-       } else
-               memcpy(psinfo->buf, big_oops_buf, total_len);
-
-       return total_len;
+       kvfree(big_oops_buf);
+       big_oops_buf = NULL;
 }
 
 void pstore_record_init(struct pstore_record *record,
@@ -426,13 +304,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
                record.part = part;
                record.buf = psinfo->buf;
 
-               if (big_oops_buf) {
-                       dst = big_oops_buf;
-                       dst_size = big_oops_buf_sz;
-               } else {
-                       dst = psinfo->buf;
-                       dst_size = psinfo->bufsize;
-               }
+               dst = big_oops_buf ?: psinfo->buf;
+               dst_size = psinfo->bufsize;
 
                /* Write dump header. */
                header_size = snprintf(dst, dst_size, "%s#%d Part%u\n", why,
@@ -453,8 +326,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
                                record.compressed = true;
                                record.size = zipped_len;
                        } else {
-                               record.size = copy_kmsg_to_buffer(header_size,
-                                                                 dump_size);
+                               record.size = header_size + dump_size;
+                               memcpy(psinfo->buf, dst, record.size);
                        }
                } else {
                        record.size = header_size + dump_size;
@@ -549,7 +422,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
        if (record->buf)
                return -EINVAL;
 
-       record->buf = memdup_user(buf, record->size);
+       record->buf = vmemdup_user(buf, record->size);
        if (IS_ERR(record->buf)) {
                ret = PTR_ERR(record->buf);
                goto out;
@@ -557,7 +430,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
 
        ret = record->psi->write(record);
 
-       kfree(record->buf);
+       kvfree(record->buf);
 out:
        record->buf = NULL;
 
@@ -681,7 +554,8 @@ void pstore_unregister(struct pstore_info *psi)
 }
 EXPORT_SYMBOL_GPL(pstore_unregister);
 
-static void decompress_record(struct pstore_record *record)
+static void decompress_record(struct pstore_record *record,
+                             struct z_stream_s *zstream)
 {
        int ret;
        int unzipped_len;
@@ -697,40 +571,50 @@ static void decompress_record(struct pstore_record *record)
        }
 
        /* Missing compression buffer means compression was not initialized. */
-       if (!big_oops_buf) {
+       if (!zstream->workspace) {
                pr_warn("no decompression method initialized!\n");
                return;
        }
 
+       ret = zlib_inflateReset(zstream);
+       if (ret != Z_OK) {
+               pr_err("zlib_inflateReset() failed, ret = %d!\n", ret);
+               return;
+       }
+
        /* Allocate enough space to hold max decompression and ECC. */
-       unzipped_len = big_oops_buf_sz;
-       workspace = kmalloc(unzipped_len + record->ecc_notice_size,
-                           GFP_KERNEL);
+       workspace = kvzalloc(psinfo->bufsize + record->ecc_notice_size,
+                            GFP_KERNEL);
        if (!workspace)
                return;
 
-       /* After decompression "unzipped_len" is almost certainly smaller. */
-       ret = crypto_comp_decompress(tfm, record->buf, record->size,
-                                         workspace, &unzipped_len);
-       if (ret) {
-               pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
-               kfree(workspace);
+       zstream->next_in        = record->buf;
+       zstream->avail_in       = record->size;
+       zstream->next_out       = workspace;
+       zstream->avail_out      = psinfo->bufsize;
+
+       ret = zlib_inflate(zstream, Z_FINISH);
+       if (ret != Z_STREAM_END) {
+               pr_err("zlib_inflate() failed, ret = %d!\n", ret);
+               kvfree(workspace);
                return;
        }
 
+       unzipped_len = zstream->total_out;
+
        /* Append ECC notice to decompressed buffer. */
        memcpy(workspace + unzipped_len, record->buf + record->size,
               record->ecc_notice_size);
 
        /* Copy decompressed contents into an minimum-sized allocation. */
-       unzipped = kmemdup(workspace, unzipped_len + record->ecc_notice_size,
-                          GFP_KERNEL);
-       kfree(workspace);
+       unzipped = kvmemdup(workspace, unzipped_len + record->ecc_notice_size,
+                           GFP_KERNEL);
+       kvfree(workspace);
        if (!unzipped)
                return;
 
        /* Swap out compressed contents with decompressed contents. */
-       kfree(record->buf);
+       kvfree(record->buf);
        record->buf = unzipped;
        record->size = unzipped_len;
        record->compressed = false;
@@ -747,10 +631,17 @@ void pstore_get_backend_records(struct pstore_info *psi,
 {
        int failed = 0;
        unsigned int stop_loop = 65536;
+       struct z_stream_s zstream = {};
 
        if (!psi || !root)
                return;
 
+       if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress) {
+               zstream.workspace = kvmalloc(zlib_inflate_workspacesize(),
+                                            GFP_KERNEL);
+               zlib_inflateInit2(&zstream, -DEF_WBITS);
+       }
+
        mutex_lock(&psi->read_mutex);
        if (psi->open && psi->open(psi))
                goto out;
@@ -779,11 +670,11 @@ void pstore_get_backend_records(struct pstore_info *psi,
                        break;
                }
 
-               decompress_record(record);
+               decompress_record(record, &zstream);
                rc = pstore_mkfile(root, record);
                if (rc) {
                        /* pstore_mkfile() did not take record, so free it. */
-                       kfree(record->buf);
+                       kvfree(record->buf);
                        kfree(record->priv);
                        kfree(record);
                        if (rc != -EEXIST || !quiet)
@@ -795,6 +686,12 @@ void pstore_get_backend_records(struct pstore_info *psi,
 out:
        mutex_unlock(&psi->read_mutex);
 
+       if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress) {
+               if (zlib_inflateEnd(&zstream) != Z_OK)
+                       pr_warn("zlib_inflateEnd() failed\n");
+               kvfree(zstream.workspace);
+       }
+
        if (failed)
                pr_warn("failed to create %d record(s) from '%s'\n",
                        failed, psi->name);
@@ -818,34 +715,10 @@ static void pstore_timefunc(struct timer_list *unused)
        pstore_timer_kick();
 }
 
-static void __init pstore_choose_compression(void)
-{
-       const struct pstore_zbackend *step;
-
-       if (!compress)
-               return;
-
-       for (step = zbackends; step->name; step++) {
-               if (!strcmp(compress, step->name)) {
-                       zbackend = step;
-                       return;
-               }
-       }
-}
-
 static int __init pstore_init(void)
 {
        int ret;
 
-       pstore_choose_compression();
-
-       /*
-        * Check if any pstore backends registered earlier but did not
-        * initialize compression because crypto was not ready. If so,
-        * initialize compression now.
-        */
-       allocate_buf_for_compression();
-
        ret = pstore_init_fs();
        if (ret)
                free_buf_for_compression();
index 2f625e1..d36702c 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/compiler.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/mm.h>
 
 #include "internal.h"
 #include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
        /* ECC correction notice */
        record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
 
-       record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+       record->buf = kvzalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
        if (record->buf == NULL) {
                size = -ENOMEM;
                goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
 
 out:
        if (free_prz) {
-               kfree(prz->old_log);
+               kvfree(prz->old_log);
                kfree(prz);
        }
 
@@ -833,7 +834,7 @@ static int ramoops_probe(struct platform_device *pdev)
         */
        if (cxt->pstore.flags & PSTORE_FLAGS_DMESG) {
                cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size;
-               cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
+               cxt->pstore.buf = kvzalloc(cxt->pstore.bufsize, GFP_KERNEL);
                if (!cxt->pstore.buf) {
                        pr_err("cannot allocate pstore crash dump buffer\n");
                        err = -ENOMEM;
@@ -866,7 +867,7 @@ static int ramoops_probe(struct platform_device *pdev)
        return 0;
 
 fail_buf:
-       kfree(cxt->pstore.buf);
+       kvfree(cxt->pstore.buf);
 fail_clear:
        cxt->pstore.bufsize = 0;
 fail_init:
@@ -881,7 +882,7 @@ static void ramoops_remove(struct platform_device *pdev)
 
        pstore_unregister(&cxt->pstore);
 
-       kfree(cxt->pstore.buf);
+       kvfree(cxt->pstore.buf);
        cxt->pstore.bufsize = 0;
 
        ramoops_free_przs(cxt);
index 85aaf0f..650e437 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 #include <asm/page.h>
 
 #include "ram_internal.h"
 /**
  * struct persistent_ram_buffer - persistent circular RAM buffer
  *
- * @sig:
- *     signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value)
- * @start:
- *     offset into @data where the beginning of the stored bytes begin
- * @size:
- *     number of valid bytes stored in @data
+ * @sig: Signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value)
+ * @start: First valid byte in the buffer.
+ * @size: Number of valid bytes in the buffer.
+ * @data: The contents of the buffer.
  */
 struct persistent_ram_buffer {
        uint32_t    sig;
@@ -301,7 +300,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
 
        if (!prz->old_log) {
                persistent_ram_ecc_old(prz);
-               prz->old_log = kmalloc(size, GFP_KERNEL);
+               prz->old_log = kvzalloc(size, GFP_KERNEL);
        }
        if (!prz->old_log) {
                pr_err("failed to allocate buffer\n");
@@ -385,7 +384,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)
 
 void persistent_ram_free_old(struct persistent_ram_zone *prz)
 {
-       kfree(prz->old_log);
+       kvfree(prz->old_log);
        prz->old_log = NULL;
        prz->old_log_size = 0;
 }
@@ -519,7 +518,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
        sig ^= PERSISTENT_RAM_SIG;
 
        if (prz->buffer->sig == sig) {
-               if (buffer_size(prz) == 0) {
+               if (buffer_size(prz) == 0 && buffer_start(prz) == 0) {
                        pr_debug("found existing empty buffer\n");
                        return 0;
                }
index 391ea40..a7171f5 100644 (file)
@@ -305,8 +305,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
        inode->i_mtime.tv_nsec = 0;
        inode->i_atime.tv_sec   = le32_to_cpu(raw_inode->di_atime);
        inode->i_atime.tv_nsec = 0;
-       inode->i_ctime.tv_sec   = le32_to_cpu(raw_inode->di_ctime);
-       inode->i_ctime.tv_nsec = 0;
+       inode_set_ctime(inode, le32_to_cpu(raw_inode->di_ctime), 0);
        inode->i_blocks  = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size);
 
        memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE);
index 85b2fa3..21f90d5 100644 (file)
@@ -562,8 +562,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
        inode->i_mtime.tv_nsec = 0;
        inode->i_atime.tv_sec   = fs32_to_cpu(sbi, raw_inode->di_atime);
        inode->i_atime.tv_nsec = 0;
-       inode->i_ctime.tv_sec   = fs32_to_cpu(sbi, raw_inode->di_ctime);
-       inode->i_ctime.tv_nsec = 0;
+       inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->di_ctime), 0);
 
        /* calc blocks based on 512 byte blocksize */
        inode->i_blocks = (inode->i_size + 511) >> 9;
index e3e4f40..4d826c3 100644 (file)
@@ -2367,7 +2367,7 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
 
        if (!fmt)
                return -ESRCH;
-       if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
+       if (!sb->dq_op || !sb->s_qcop ||
            (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
                error = -EINVAL;
                goto out_fmt;
index fef477c..18e8387 100644 (file)
@@ -65,7 +65,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
                inode->i_mapping->a_ops = &ram_aops;
                mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
                mapping_set_unevictable(inode->i_mapping);
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                switch (mode & S_IFMT) {
                default:
                        init_special_inode(inode, mode, dev);
@@ -105,7 +105,7 @@ ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
                d_instantiate(dentry, inode);
                dget(dentry);   /* Extra count - pin the dentry in core */
                error = 0;
-               dir->i_mtime = dir->i_ctime = current_time(dir);
+               dir->i_mtime = inode_set_ctime_current(dir);
        }
        return error;
 }
@@ -138,7 +138,7 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
                if (!error) {
                        d_instantiate(dentry, inode);
                        dget(dentry);
-                       dir->i_mtime = dir->i_ctime = current_time(dir);
+                       dir->i_mtime = inode_set_ctime_current(dir);
                } else
                        iput(inode);
        }
index b07de77..4771701 100644 (file)
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(vfs_setpos);
  * @file:      file structure to seek on
  * @offset:    file offset to seek to
  * @whence:    type of seek
- * @size:      max size of this file in file system
+ * @maxsize:   max size of this file in file system
  * @eof:       offset used for SEEK_END position
  *
  * This is a variant of generic_file_llseek that allows passing in a custom
index b264ce6..c8c46e2 100644 (file)
 #include <asm/unaligned.h>
 
 /*
+ * Some filesystems were never converted to '->iterate_shared()'
+ * and their directory iterators want the inode lock held for
+ * writing. This wrapper allows for converting from the shared
+ * semantics to the exclusive inode use.
+ */
+int wrap_directory_iterator(struct file *file,
+                           struct dir_context *ctx,
+                           int (*iter)(struct file *, struct dir_context *))
+{
+       struct inode *inode = file_inode(file);
+       int ret;
+
+       /*
+        * We'd love to have an 'inode_upgrade_trylock()' operation,
+        * see the comment in mmap_upgrade_trylock() in mm/memory.c.
+        *
+        * But considering this is for "filesystems that never got
+        * converted", it really doesn't matter.
+        *
+        * Also note that since we have to return with the lock held
+        * for reading, we can't use the "killable()" locking here,
+        * since we do need to get the lock even if we're dying.
+        *
+        * We could do the write part killably and then get the read
+        * lock unconditionally if it mattered, but see above on why
+        * this does the very simplistic conversion.
+        */
+       up_read(&inode->i_rwsem);
+       down_write(&inode->i_rwsem);
+
+       /*
+        * Since we dropped the inode lock, we should do the
+        * DEADDIR test again. See 'iterate_dir()' below.
+        *
+        * Note that we don't need to re-do the f_pos games,
+        * since the file must be locked wrt f_pos anyway.
+        */
+       ret = -ENOENT;
+       if (!IS_DEADDIR(inode))
+               ret = iter(file, ctx);
+
+       downgrade_write(&inode->i_rwsem);
+       return ret;
+}
+EXPORT_SYMBOL(wrap_directory_iterator);
+
+/*
  * Note the "unsafe_put_user() semantics: we goto a
  * label for errors.
  */
 int iterate_dir(struct file *file, struct dir_context *ctx)
 {
        struct inode *inode = file_inode(file);
-       bool shared = false;
        int res = -ENOTDIR;
-       if (file->f_op->iterate_shared)
-               shared = true;
-       else if (!file->f_op->iterate)
+
+       if (!file->f_op->iterate_shared)
                goto out;
 
        res = security_file_permission(file, MAY_READ);
        if (res)
                goto out;
 
-       if (shared)
-               res = down_read_killable(&inode->i_rwsem);
-       else
-               res = down_write_killable(&inode->i_rwsem);
+       res = down_read_killable(&inode->i_rwsem);
        if (res)
                goto out;
 
        res = -ENOENT;
        if (!IS_DEADDIR(inode)) {
                ctx->pos = file->f_pos;
-               if (shared)
-                       res = file->f_op->iterate_shared(file, ctx);
-               else
-                       res = file->f_op->iterate(file, ctx);
+               res = file->f_op->iterate_shared(file, ctx);
                file->f_pos = ctx->pos;
                fsnotify_access(file);
                file_accessed(file);
        }
-       if (shared)
-               inode_unlock_shared(inode);
-       else
-               inode_unlock(inode);
+       inode_unlock_shared(inode);
 out:
        return res;
 }
index 77bd3b2..86e55d4 100644 (file)
@@ -1259,9 +1259,8 @@ static void init_inode(struct inode *inode, struct treepath *path)
                inode->i_size = sd_v1_size(sd);
                inode->i_atime.tv_sec = sd_v1_atime(sd);
                inode->i_mtime.tv_sec = sd_v1_mtime(sd);
-               inode->i_ctime.tv_sec = sd_v1_ctime(sd);
+               inode_set_ctime(inode, sd_v1_ctime(sd), 0);
                inode->i_atime.tv_nsec = 0;
-               inode->i_ctime.tv_nsec = 0;
                inode->i_mtime.tv_nsec = 0;
 
                inode->i_blocks = sd_v1_blocks(sd);
@@ -1314,8 +1313,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
                i_gid_write(inode, sd_v2_gid(sd));
                inode->i_mtime.tv_sec = sd_v2_mtime(sd);
                inode->i_atime.tv_sec = sd_v2_atime(sd);
-               inode->i_ctime.tv_sec = sd_v2_ctime(sd);
-               inode->i_ctime.tv_nsec = 0;
+               inode_set_ctime(inode, sd_v2_ctime(sd), 0);
                inode->i_mtime.tv_nsec = 0;
                inode->i_atime.tv_nsec = 0;
                inode->i_blocks = sd_v2_blocks(sd);
@@ -1374,7 +1372,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
        set_sd_v2_gid(sd_v2, i_gid_read(inode));
        set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
        set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
-       set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
+       set_sd_v2_ctime(sd_v2, inode_get_ctime(inode).tv_sec);
        set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
                set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
@@ -1394,7 +1392,7 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
        set_sd_v1_nlink(sd_v1, inode->i_nlink);
        set_sd_v1_size(sd_v1, size);
        set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
-       set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
+       set_sd_v1_ctime(sd_v1, inode_get_ctime(inode).tv_sec);
        set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
 
        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -1986,7 +1984,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
 
        /* uid and gid must already be set by the caller for quota init */
 
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_size = i_size;
        inode->i_blocks = 0;
        inode->i_bytes = 0;
index 6bf9b54..dd33f8c 100644 (file)
@@ -55,7 +55,7 @@ int reiserfs_fileattr_set(struct mnt_idmap *idmap,
        }
        sd_attrs_to_i_attrs(flags, inode);
        REISERFS_I(inode)->i_attrs = flags;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        err = 0;
 unlock:
@@ -107,7 +107,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        err = -EFAULT;
                        goto setversion_out;
                }
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                mark_inode_dirty(inode);
 setversion_out:
                mnt_drop_write_file(filp);
index 479aa4a..015bfe4 100644 (file)
@@ -2326,7 +2326,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
        int i, j;
 
        bh = __getblk(dev, block, bufsize);
-       if (buffer_uptodate(bh))
+       if (!bh || buffer_uptodate(bh))
                return (bh);
 
        if (block + BUFNR > max_block) {
@@ -2336,6 +2336,8 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
        j = 1;
        for (i = 1; i < blocks; i++) {
                bh = __getblk(dev, block + i, bufsize);
+               if (!bh)
+                       break;
                if (buffer_uptodate(bh)) {
                        brelse(bh);
                        break;
index 52240cc..9c5704b 100644 (file)
@@ -572,7 +572,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
        }
 
        dir->i_size += paste_size;
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        if (!S_ISDIR(inode->i_mode) && visible)
                /* reiserfs_mkdir or reiserfs_rename will do that by itself */
                reiserfs_update_sd(th, dir);
@@ -966,7 +966,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
                               inode->i_nlink);
 
        clear_nlink(inode);
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_to_ts(dir,
+                                            inode_set_ctime_current(inode));
        reiserfs_update_sd(&th, inode);
 
        DEC_DIR_INODE_NLINK(dir)
@@ -1070,11 +1071,11 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
                inc_nlink(inode);
                goto end_unlink;
        }
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        reiserfs_update_sd(&th, inode);
 
        dir->i_size -= (de.de_entrylen + DEH_SIZE);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        reiserfs_update_sd(&th, dir);
 
        if (!savelink)
@@ -1250,7 +1251,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
                return err ? err : retval;
        }
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        reiserfs_update_sd(&th, inode);
 
        ihold(inode);
@@ -1325,7 +1326,6 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
        int jbegin_count;
        umode_t old_inode_mode;
        unsigned long savelink = 1;
-       struct timespec64 ctime;
 
        if (flags & ~RENAME_NOREPLACE)
                return -EINVAL;
@@ -1576,14 +1576,11 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
 
        mark_de_hidden(old_de.de_deh + old_de.de_entry_num);
        journal_mark_dirty(&th, old_de.de_bh);
-       ctime = current_time(old_dir);
-       old_dir->i_ctime = old_dir->i_mtime = ctime;
-       new_dir->i_ctime = new_dir->i_mtime = ctime;
        /*
         * thanks to Alex Adriaanse <alex_a@caltech.edu> for patch
         * which adds ctime update of renamed object
         */
-       old_inode->i_ctime = ctime;
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
        if (new_dentry_inode) {
                /* adjust link number of the victim */
@@ -1592,7 +1589,6 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
                } else {
                        drop_nlink(new_dentry_inode);
                }
-               new_dentry_inode->i_ctime = ctime;
                savelink = new_dentry_inode->i_nlink;
        }
 
index ce50039..3676e02 100644 (file)
@@ -2004,7 +2004,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
 
                        if (update_timestamps) {
                                inode->i_mtime = current_time(inode);
-                               inode->i_ctime = current_time(inode);
+                               inode_set_ctime_current(inode);
                        }
                        reiserfs_update_sd(th, inode);
 
@@ -2029,7 +2029,7 @@ update_and_out:
        if (update_timestamps) {
                /* this is truncate, not file closing */
                inode->i_mtime = current_time(inode);
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
        }
        reiserfs_update_sd(th, inode);
 
index 929acce..7eaf36b 100644 (file)
@@ -2587,7 +2587,7 @@ out:
                return err;
        if (inode->i_size < off + len - towrite)
                i_size_write(inode, off + len - towrite);
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        return len - towrite;
 }
index 6510279..6000964 100644 (file)
@@ -466,12 +466,13 @@ int reiserfs_commit_write(struct file *f, struct page *page,
 static void update_ctime(struct inode *inode)
 {
        struct timespec64 now = current_time(inode);
+       struct timespec64 ctime = inode_get_ctime(inode);
 
        if (inode_unhashed(inode) || !inode->i_nlink ||
-           timespec64_equal(&inode->i_ctime, &now))
+           timespec64_equal(&ctime, &now))
                return;
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_to_ts(inode, now);
        mark_inode_dirty(inode);
 }
 
index 1380604..0642649 100644 (file)
@@ -285,7 +285,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
        if (error == -ENODATA) {
                error = 0;
                if (type == ACL_TYPE_ACCESS) {
-                       inode->i_ctime = current_time(inode);
+                       inode_set_ctime_current(inode);
                        mark_inode_dirty(inode);
                }
        }
index c59b230..5c35f6c 100644 (file)
@@ -322,8 +322,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
 
        set_nlink(i, 1);                /* Hard to decide.. */
        i->i_size = be32_to_cpu(ri.size);
-       i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
-       i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
+       i->i_mtime = i->i_atime = inode_set_ctime(i, 0, 0);
 
        /* set up mode and ops */
        mode = romfs_modemap[nextfh & ROMFH_TYPE];
@@ -583,16 +582,18 @@ static int romfs_init_fs_context(struct fs_context *fc)
  */
 static void romfs_kill_sb(struct super_block *sb)
 {
+       generic_shutdown_super(sb);
+
 #ifdef CONFIG_ROMFS_ON_MTD
        if (sb->s_mtd) {
-               kill_mtd_super(sb);
-               return;
+               put_mtd_device(sb->s_mtd);
+               sb->s_mtd = NULL;
        }
 #endif
 #ifdef CONFIG_ROMFS_ON_BLOCK
        if (sb->s_bdev) {
-               kill_block_super(sb);
-               return;
+               sync_blockdev(sb->s_bdev);
+               blkdev_put(sb->s_bdev, sb);
        }
 #endif
 }
index fb4162a..aec6e91 100644 (file)
@@ -153,6 +153,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
                   in_flight(server),
                   atomic_read(&server->in_send),
                   atomic_read(&server->num_waiters));
+#ifdef CONFIG_NET_NS
+       if (server->net)
+               seq_printf(m, " Net namespace: %u ", server->net->ns.inum);
+#endif /* NET_NS */
+
 }
 
 static inline const char *smb_speed_to_str(size_t bps)
@@ -430,10 +435,15 @@ skip_rdma:
                                server->reconnect_instance,
                                server->srv_count,
                                server->sec_mode, in_flight(server));
+#ifdef CONFIG_NET_NS
+               if (server->net)
+                       seq_printf(m, " Net namespace: %u ", server->net->ns.inum);
+#endif /* NET_NS */
 
                seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d",
                                atomic_read(&server->in_send),
                                atomic_read(&server->num_waiters));
+
                if (server->leaf_fullpath) {
                        seq_printf(m, "\nDFS leaf full path: %s",
                                   server->leaf_fullpath);
index a4d8b0e..6fc8f43 100644 (file)
@@ -1077,7 +1077,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 }
 
 static int
-cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv)
+cifs_setlease(struct file *file, int arg, struct file_lock **lease, void **priv)
 {
        /*
         * Note that this is called by vfs setlease with i_lock held to
index d7274ee..15c8cc4 100644 (file)
@@ -159,6 +159,6 @@ extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
 /* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 43
-#define CIFS_VERSION   "2.43"
+#define SMB3_PRODUCT_BUILD 44
+#define CIFS_VERSION   "2.44"
 #endif                         /* _CIFSFS_H */
index b5808fe..657dee4 100644 (file)
@@ -532,7 +532,7 @@ struct smb_version_operations {
        /* Check for STATUS_IO_TIMEOUT */
        bool (*is_status_io_timeout)(char *buf);
        /* Check for STATUS_NETWORK_NAME_DELETED */
-       void (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
+       bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
 };
 
 struct smb_version_values {
@@ -1062,6 +1062,7 @@ struct cifs_ses {
        unsigned long chans_need_reconnect;
        /* ========= end: protected by chan_lock ======== */
        struct cifs_ses *dfs_root_ses;
+       struct nls_table *local_nls;
 };
 
 static inline bool
index 19f7385..25503f1 100644 (file)
@@ -129,7 +129,7 @@ again:
        }
        spin_unlock(&server->srv_lock);
 
-       nls_codepage = load_nls_default();
+       nls_codepage = ses->local_nls;
 
        /*
         * need to prevent multiple threads trying to simultaneously
@@ -200,7 +200,6 @@ out:
                rc = -EAGAIN;
        }
 
-       unload_nls(nls_codepage);
        return rc;
 }
 
@@ -3184,7 +3183,7 @@ setAclRetry:
        param_offset = offsetof(struct smb_com_transaction2_spi_req,
                                InformationLevel) - 4;
        offset = param_offset + params;
-       parm_data = ((char *) &pSMB->hdr.Protocol) + offset;
+       parm_data = ((char *)pSMB) + sizeof(pSMB->hdr.smb_buf_length) + offset;
        pSMB->ParameterOffset = cpu_to_le16(param_offset);
 
        /* convert to on the wire format for POSIX ACL */
index 85dd1b3..238538d 100644 (file)
@@ -60,7 +60,7 @@ extern bool disable_legacy_dialects;
 #define TLINK_IDLE_EXPIRE      (600 * HZ)
 
 /* Drop the connection to not overload the server */
-#define NUM_STATUS_IO_TIMEOUT   5
+#define MAX_STATUS_IO_TIMEOUT   5
 
 static int ip_connect(struct TCP_Server_Info *server);
 static int generic_ip_connect(struct TCP_Server_Info *server);
@@ -1117,6 +1117,7 @@ cifs_demultiplex_thread(void *p)
        struct mid_q_entry *mids[MAX_COMPOUND];
        char *bufs[MAX_COMPOUND];
        unsigned int noreclaim_flag, num_io_timeout = 0;
+       bool pending_reconnect = false;
 
        noreclaim_flag = memalloc_noreclaim_save();
        cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current));
@@ -1156,6 +1157,8 @@ cifs_demultiplex_thread(void *p)
                cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length);
                if (!is_smb_response(server, buf[0]))
                        continue;
+
+               pending_reconnect = false;
 next_pdu:
                server->pdu_size = pdu_length;
 
@@ -1213,10 +1216,13 @@ next_pdu:
                if (server->ops->is_status_io_timeout &&
                    server->ops->is_status_io_timeout(buf)) {
                        num_io_timeout++;
-                       if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) {
-                               cifs_reconnect(server, false);
+                       if (num_io_timeout > MAX_STATUS_IO_TIMEOUT) {
+                               cifs_server_dbg(VFS,
+                                               "Number of request timeouts exceeded %d. Reconnecting",
+                                               MAX_STATUS_IO_TIMEOUT);
+
+                               pending_reconnect = true;
                                num_io_timeout = 0;
-                               continue;
                        }
                }
 
@@ -1226,9 +1232,14 @@ next_pdu:
                        if (mids[i] != NULL) {
                                mids[i]->resp_buf_size = server->pdu_size;
 
-                               if (bufs[i] && server->ops->is_network_name_deleted)
-                                       server->ops->is_network_name_deleted(bufs[i],
-                                                                       server);
+                               if (bufs[i] != NULL) {
+                                       if (server->ops->is_network_name_deleted &&
+                                           server->ops->is_network_name_deleted(bufs[i],
+                                                                                server)) {
+                                               cifs_server_dbg(FYI,
+                                                               "Share deleted. Reconnect needed");
+                                       }
+                               }
 
                                if (!mids[i]->multiRsp || mids[i]->multiEnd)
                                        mids[i]->callback(mids[i]);
@@ -1263,6 +1274,11 @@ next_pdu:
                        buf = server->smallbuf;
                        goto next_pdu;
                }
+
+               /* do this reconnect at the very end after processing all MIDs */
+               if (pending_reconnect)
+                       cifs_reconnect(server, true);
+
        } /* end while !EXITING */
 
        /* buffer usually freed in free_mid - need to free it here on exit */
@@ -1826,6 +1842,10 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
                            CIFS_MAX_PASSWORD_LEN))
                        return 0;
        }
+
+       if (strcmp(ctx->local_nls->charset, ses->local_nls->charset))
+               return 0;
+
        return 1;
 }
 
@@ -2270,6 +2290,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 
        ses->sectype = ctx->sectype;
        ses->sign = ctx->sign;
+       ses->local_nls = load_nls(ctx->local_nls->charset);
 
        /* add server as first channel */
        spin_lock(&ses->chan_lock);
index 1403a2d..ee772c3 100644 (file)
@@ -66,6 +66,12 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path)
        return rc;
 }
 
+/*
+ * Track individual DFS referral servers used by new DFS mount.
+ *
+ * On success, their lifetime will be shared by final tcon (dfs_ses_list).
+ * Otherwise, they will be put by dfs_put_root_smb_sessions() in cifs_mount().
+ */
 static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
 {
        struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
@@ -80,11 +86,12 @@ static int add_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
                INIT_LIST_HEAD(&root_ses->list);
 
                spin_lock(&cifs_tcp_ses_lock);
-               ses->ses_count++;
+               cifs_smb_ses_inc_refcount(ses);
                spin_unlock(&cifs_tcp_ses_lock);
                root_ses->ses = ses;
                list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list);
        }
+       /* Select new DFS referral server so that new referrals go through it */
        ctx->dfs_root_ses = ses;
        return 0;
 }
@@ -170,8 +177,12 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
                struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
 
                rc = dfs_get_referral(mnt_ctx, ref_path + 1, NULL, &tl);
-               if (rc)
+               if (rc) {
+                       rc = cifs_mount_get_tcon(mnt_ctx);
+                       if (!rc)
+                               rc = cifs_is_path_remote(mnt_ctx);
                        break;
+               }
 
                tit = dfs_cache_get_tgt_iterator(&tl);
                if (!tit) {
@@ -242,7 +253,6 @@ out:
 int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
 {
        struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
-       struct cifs_ses *ses;
        bool nodfs = ctx->nodfs;
        int rc;
 
@@ -276,20 +286,8 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
        }
 
        *isdfs = true;
-       /*
-        * Prevent DFS root session of being put in the first call to
-        * cifs_mount_put_conns().  If another DFS root server was not found
-        * while chasing the referrals (@ctx->dfs_root_ses == @ses), then we
-        * can safely put extra refcount of @ses.
-        */
-       ses = mnt_ctx->ses;
-       mnt_ctx->ses = NULL;
-       mnt_ctx->server = NULL;
-       rc = __dfs_mount_share(mnt_ctx);
-       if (ses == ctx->dfs_root_ses)
-               cifs_put_smb_ses(ses);
-
-       return rc;
+       add_root_smb_session(mnt_ctx);
+       return __dfs_mount_share(mnt_ctx);
 }
 
 /* Update dfs referral path of superblock */
index 879bc8e..2108b3b 100644 (file)
@@ -1080,12 +1080,12 @@ int cifs_close(struct inode *inode, struct file *file)
                cfile = file->private_data;
                file->private_data = NULL;
                dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
-               if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
-                   cinode->lease_granted &&
+               if ((cifs_sb->ctx->closetimeo && cinode->oplock == CIFS_CACHE_RHW_FLG)
+                   && cinode->lease_granted &&
                    !test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
                    dclose) {
                        if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
-                               inode->i_ctime = inode->i_mtime = current_time(inode);
+                               inode->i_mtime = inode_set_ctime_current(inode);
                        }
                        spin_lock(&cinode->deferred_lock);
                        cifs_add_deferred_close(cfile, dclose);
@@ -2596,7 +2596,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
                                           write_data, to - from, &offset);
                cifsFileInfo_put(open_file);
                /* Does mm or vfs already set times? */
-               inode->i_atime = inode->i_mtime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
                if ((bytes_written > 0) && (offset))
                        rc = 0;
                else if (bytes_written < 0)
@@ -4681,9 +4681,9 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 
 io_error:
        kunmap(page);
-       unlock_page(page);
 
 read_complete:
+       unlock_page(page);
        return rc;
 }
 
@@ -4878,9 +4878,11 @@ void cifs_oplock_break(struct work_struct *work)
        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
                                                  oplock_break);
        struct inode *inode = d_inode(cfile->dentry);
+       struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifsInodeInfo *cinode = CIFS_I(inode);
-       struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
-       struct TCP_Server_Info *server = tcon->ses->server;
+       struct cifs_tcon *tcon;
+       struct TCP_Server_Info *server;
+       struct tcon_link *tlink;
        int rc = 0;
        bool purge_cache = false, oplock_break_cancelled;
        __u64 persistent_fid, volatile_fid;
@@ -4889,6 +4891,12 @@ void cifs_oplock_break(struct work_struct *work)
        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
                        TASK_UNINTERRUPTIBLE);
 
+       tlink = cifs_sb_tlink(cifs_sb);
+       if (IS_ERR(tlink))
+               goto out;
+       tcon = tlink_tcon(tlink);
+       server = tcon->ses->server;
+
        server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
                                      cfile->oplock_epoch, &purge_cache);
 
@@ -4938,18 +4946,19 @@ oplock_break_ack:
        /*
         * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
         * an acknowledgment to be sent when the file has already been closed.
-        * check for server null, since can race with kill_sb calling tree disconnect.
         */
        spin_lock(&cinode->open_file_lock);
-       if (tcon->ses && tcon->ses->server && !oplock_break_cancelled &&
-                                       !list_empty(&cinode->openFileList)) {
+       /* check list empty since can race with kill_sb calling tree disconnect */
+       if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
                spin_unlock(&cinode->open_file_lock);
-               rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid,
-                                               volatile_fid, net_fid, cinode);
+               rc = server->ops->oplock_response(tcon, persistent_fid,
+                                                 volatile_fid, net_fid, cinode);
                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
        } else
                spin_unlock(&cinode->open_file_lock);
 
+       cifs_put_tlink(tlink);
+out:
        cifs_done_oplock_break(cinode);
 }
 
index 4946a0c..67e16c2 100644 (file)
@@ -231,6 +231,8 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c
                break;
        case Opt_sec_none:
                ctx->nullauth = 1;
+               kfree(ctx->username);
+               ctx->username = NULL;
                break;
        default:
                cifs_errorf(fc, "bad security option: %s\n", value);
@@ -1201,6 +1203,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
        case Opt_user:
                kfree(ctx->username);
                ctx->username = NULL;
+               if (ctx->nullauth)
+                       break;
                if (strlen(param->string) == 0) {
                        /* null user, ie. anonymous authentication */
                        ctx->nullauth = 1;
index 1739996..84f3b09 100644 (file)
@@ -50,12 +50,13 @@ void cifs_fscache_fill_coherency(struct inode *inode,
                                 struct cifs_fscache_inode_coherency_data *cd)
 {
        struct cifsInodeInfo *cifsi = CIFS_I(inode);
+       struct timespec64 ctime = inode_get_ctime(inode);
 
        memset(cd, 0, sizeof(*cd));
        cd->last_write_time_sec   = cpu_to_le64(cifsi->netfs.inode.i_mtime.tv_sec);
        cd->last_write_time_nsec  = cpu_to_le32(cifsi->netfs.inode.i_mtime.tv_nsec);
-       cd->last_change_time_sec  = cpu_to_le64(cifsi->netfs.inode.i_ctime.tv_sec);
-       cd->last_change_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_ctime.tv_nsec);
+       cd->last_change_time_sec  = cpu_to_le64(ctime.tv_sec);
+       cd->last_change_time_nsec = cpu_to_le32(ctime.tv_nsec);
 }
 
 
index c3eeae0..93fe437 100644 (file)
@@ -172,7 +172,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
        else
                inode->i_atime = fattr->cf_atime;
        inode->i_mtime = fattr->cf_mtime;
-       inode->i_ctime = fattr->cf_ctime;
+       inode_set_ctime_to_ts(inode, fattr->cf_ctime);
        inode->i_rdev = fattr->cf_rdev;
        cifs_nlink_fattr_to_inode(inode, fattr);
        inode->i_uid = fattr->cf_uid;
@@ -1744,9 +1744,9 @@ out_reval:
                cifs_inode = CIFS_I(inode);
                cifs_inode->time = 0;   /* will force revalidate to get info
                                           when needed */
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
        }
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        cifs_inode = CIFS_I(dir);
        CIFS_I(dir)->time = 0;  /* force revalidate of dir as well */
 unlink_out:
@@ -2060,8 +2060,8 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
         */
        cifsInode->time = 0;
 
-       d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime =
-               current_time(inode);
+       inode_set_ctime_current(d_inode(direntry));
+       inode->i_mtime = inode_set_ctime_current(inode);
 
 rmdir_exit:
        free_dentry_path(page);
@@ -2267,8 +2267,8 @@ unlink_target:
        /* force revalidate to go get info when needed */
        CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
 
-       source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
-               target_dir->i_mtime = current_time(source_dir);
+       source_dir->i_mtime = target_dir->i_mtime = inode_set_ctime_to_ts(source_dir,
+                                                                         inode_set_ctime_current(target_dir));
 
 cifs_rename_exit:
        kfree(info_buf_source);
@@ -2540,7 +2540,7 @@ int cifs_getattr(struct mnt_idmap *idmap, const struct path *path,
                        return rc;
        }
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        stat->blksize = cifs_sb->ctx->bsize;
        stat->ino = CIFS_I(inode)->uniqueid;
 
index fff092b..f716000 100644 (file)
@@ -433,16 +433,21 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                         * Dump encryption keys. This is an old ioctl that only
                         * handles AES-128-{CCM,GCM}.
                         */
-                       if (pSMBFile == NULL)
-                               break;
                        if (!capable(CAP_SYS_ADMIN)) {
                                rc = -EACCES;
                                break;
                        }
 
-                       tcon = tlink_tcon(pSMBFile->tlink);
+                       cifs_sb = CIFS_SB(inode->i_sb);
+                       tlink = cifs_sb_tlink(cifs_sb);
+                       if (IS_ERR(tlink)) {
+                               rc = PTR_ERR(tlink);
+                               break;
+                       }
+                       tcon = tlink_tcon(tlink);
                        if (!smb3_encryption_required(tcon)) {
                                rc = -EOPNOTSUPP;
+                               cifs_put_tlink(tlink);
                                break;
                        }
                        pkey_inf.cipher_type =
@@ -459,6 +464,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                                rc = -EFAULT;
                        else
                                rc = 0;
+                       cifs_put_tlink(tlink);
                        break;
                case CIFS_DUMP_FULL_KEY:
                        /*
@@ -470,8 +476,16 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                                rc = -EACCES;
                                break;
                        }
-                       tcon = tlink_tcon(pSMBFile->tlink);
+                       cifs_sb = CIFS_SB(inode->i_sb);
+                       tlink = cifs_sb_tlink(cifs_sb);
+                       if (IS_ERR(tlink)) {
+                               rc = PTR_ERR(tlink);
+                               break;
+                       }
+
+                       tcon = tlink_tcon(tlink);
                        rc = cifs_dump_full_key(tcon, (void __user *)arg);
+                       cifs_put_tlink(tlink);
                        break;
                case CIFS_IOC_NOTIFY:
                        if (!S_ISDIR(inode->i_mode)) {
index 70dbfe6..d7e85d9 100644 (file)
@@ -95,6 +95,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
                return;
        }
 
+       unload_nls(buf_to_free->local_nls);
        atomic_dec(&sesInfoAllocCount);
        kfree(buf_to_free->serverOS);
        kfree(buf_to_free->serverDomain);
index 335c078..c57ca20 100644 (file)
@@ -1013,6 +1013,7 @@ setup_ntlm_smb3_neg_ret:
 }
 
 
+/* See MS-NLMP 2.2.1.3 */
 int build_ntlmssp_auth_blob(unsigned char **pbuffer,
                                        u16 *buflen,
                                   struct cifs_ses *ses,
@@ -1047,7 +1048,8 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
 
        flags = ses->ntlmssp->server_flags | NTLMSSP_REQUEST_TARGET |
                NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED;
-
+       /* we only send version information in ntlmssp negotiate, so do not set this flag */
+       flags = flags & ~NTLMSSP_NEGOTIATE_VERSION;
        tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
        sec_blob->NegotiateFlags = cpu_to_le32(flags);
 
index 87abce0..182e2e8 100644 (file)
@@ -1396,7 +1396,8 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
        if (file_inf.LastWriteTime)
                inode->i_mtime = cifs_NTtimeToUnix(file_inf.LastWriteTime);
        if (file_inf.ChangeTime)
-               inode->i_ctime = cifs_NTtimeToUnix(file_inf.ChangeTime);
+               inode_set_ctime_to_ts(inode,
+                                     cifs_NTtimeToUnix(file_inf.ChangeTime));
        if (file_inf.LastAccessTime)
                inode->i_atime = cifs_NTtimeToUnix(file_inf.LastAccessTime);
 
@@ -2395,7 +2396,7 @@ smb2_is_status_io_timeout(char *buf)
                return false;
 }
 
-static void
+static bool
 smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
 {
        struct smb2_hdr *shdr = (struct smb2_hdr *)buf;
@@ -2404,7 +2405,7 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
        struct cifs_tcon *tcon;
 
        if (shdr->Status != STATUS_NETWORK_NAME_DELETED)
-               return;
+               return false;
 
        /* If server is a channel, select the primary channel */
        pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server;
@@ -2419,11 +2420,13 @@ smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
                                spin_unlock(&cifs_tcp_ses_lock);
                                pr_warn_once("Server share %s deleted.\n",
                                             tcon->tree_name);
-                               return;
+                               return true;
                        }
                }
        }
        spin_unlock(&cifs_tcp_ses_lock);
+
+       return false;
 }
 
 static int
index e04766f..a457f07 100644 (file)
@@ -242,7 +242,7 @@ again:
        }
        spin_unlock(&server->srv_lock);
 
-       nls_codepage = load_nls_default();
+       nls_codepage = ses->local_nls;
 
        /*
         * need to prevent multiple threads trying to simultaneously
@@ -324,7 +324,6 @@ out:
                rc = -EAGAIN;
        }
 failed:
-       unload_nls(nls_codepage);
        return rc;
 }
 
index c6db898..7676091 100644 (file)
@@ -160,7 +160,7 @@ smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
                        spin_unlock(&ses->ses_lock);
                        continue;
                }
-               ++ses->ses_count;
+               cifs_smb_ses_inc_refcount(ses);
                spin_unlock(&ses->ses_lock);
                return ses;
        }
index fb8b2d5..b7521e4 100644 (file)
@@ -352,7 +352,8 @@ enum KSMBD_TREE_CONN_STATUS {
 #define KSMBD_SHARE_FLAG_STREAMS               BIT(11)
 #define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS       BIT(12)
 #define KSMBD_SHARE_FLAG_ACL_XATTR             BIT(13)
-#define KSMBD_SHARE_FLAG_UPDATE                BIT(14)
+#define KSMBD_SHARE_FLAG_UPDATE                        BIT(14)
+#define KSMBD_SHARE_FLAG_CROSSMNT              BIT(15)
 
 /*
  * Tree connect request flags.
index ced7a9e..9df121b 100644 (file)
@@ -286,6 +286,7 @@ static void handle_ksmbd_work(struct work_struct *wk)
 static int queue_ksmbd_work(struct ksmbd_conn *conn)
 {
        struct ksmbd_work *work;
+       int err;
 
        work = ksmbd_alloc_work_struct();
        if (!work) {
@@ -297,7 +298,11 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
        work->request_buf = conn->request_buf;
        conn->request_buf = NULL;
 
-       ksmbd_init_smb_server(work);
+       err = ksmbd_init_smb_server(work);
+       if (err) {
+               ksmbd_free_work_struct(work);
+               return 0;
+       }
 
        ksmbd_conn_enqueue_request(work);
        atomic_inc(&conn->r_count);
index 33b7e6c..e881df1 100644 (file)
@@ -380,13 +380,13 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
        }
 
        if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
-               if (command == SMB2_OPLOCK_BREAK_HE &&
-                   le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
-                   le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
+               if (!(command == SMB2_OPLOCK_BREAK_HE &&
+                   (le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_20 ||
+                   le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_21))) {
                        /* special case for SMB2.1 lease break message */
                        ksmbd_debug(SMB,
-                                   "Illegal request size %d for oplock break\n",
-                                   le16_to_cpu(pdu->StructureSize2));
+                               "Illegal request size %u for command %d\n",
+                               le16_to_cpu(pdu->StructureSize2), command);
                        return 1;
                }
        }
index cf88221..a947c18 100644 (file)
@@ -87,9 +87,9 @@ struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn
  */
 int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
 {
-       struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf);
+       struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
        unsigned int cmd = le16_to_cpu(req_hdr->Command);
-       int tree_id;
+       unsigned int tree_id;
 
        if (cmd == SMB2_TREE_CONNECT_HE ||
            cmd ==  SMB2_CANCEL_HE ||
@@ -114,7 +114,7 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
                        pr_err("The first operation in the compound does not have tcon\n");
                        return -EINVAL;
                }
-               if (work->tcon->id != tree_id) {
+               if (tree_id != UINT_MAX && work->tcon->id != tree_id) {
                        pr_err("tree id(%u) is different with id(%u) in first operation\n",
                                        tree_id, work->tcon->id);
                        return -EINVAL;
@@ -559,9 +559,9 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
  */
 int smb2_check_user_session(struct ksmbd_work *work)
 {
-       struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf);
+       struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
        struct ksmbd_conn *conn = work->conn;
-       unsigned int cmd = conn->ops->get_cmd_val(work);
+       unsigned int cmd = le16_to_cpu(req_hdr->Command);
        unsigned long long sess_id;
 
        /*
@@ -587,7 +587,7 @@ int smb2_check_user_session(struct ksmbd_work *work)
                        pr_err("The first operation in the compound does not have sess\n");
                        return -EINVAL;
                }
-               if (work->sess->id != sess_id) {
+               if (sess_id != ULLONG_MAX && work->sess->id != sess_id) {
                        pr_err("session id(%llu) is different with the first operation(%lld)\n",
                                        sess_id, work->sess->id);
                        return -EINVAL;
@@ -2324,9 +2324,16 @@ next:
                        break;
                buf_len -= next;
                eabuf = (struct smb2_ea_info *)((char *)eabuf + next);
-               if (next < (u32)eabuf->EaNameLength + le16_to_cpu(eabuf->EaValueLength))
+               if (buf_len < sizeof(struct smb2_ea_info)) {
+                       rc = -EINVAL;
                        break;
+               }
 
+               if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength +
+                               le16_to_cpu(eabuf->EaValueLength)) {
+                       rc = -EINVAL;
+                       break;
+               }
        } while (next != 0);
 
        kfree(attr_name);
@@ -2467,8 +2474,9 @@ static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
        }
 }
 
-static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
-                     int open_flags, umode_t posix_mode, bool is_dir)
+static int smb2_creat(struct ksmbd_work *work, struct path *parent_path,
+                     struct path *path, char *name, int open_flags,
+                     umode_t posix_mode, bool is_dir)
 {
        struct ksmbd_tree_connect *tcon = work->tcon;
        struct ksmbd_share_config *share = tcon->share_conf;
@@ -2495,7 +2503,7 @@ static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
                        return rc;
        }
 
-       rc = ksmbd_vfs_kern_path_locked(work, name, 0, path, 0);
+       rc = ksmbd_vfs_kern_path_locked(work, name, 0, parent_path, path, 0);
        if (rc) {
                pr_err("cannot get linux path (%s), err = %d\n",
                       name, rc);
@@ -2565,7 +2573,7 @@ int smb2_open(struct ksmbd_work *work)
        struct ksmbd_tree_connect *tcon = work->tcon;
        struct smb2_create_req *req;
        struct smb2_create_rsp *rsp;
-       struct path path;
+       struct path path, parent_path;
        struct ksmbd_share_config *share = tcon->share_conf;
        struct ksmbd_file *fp = NULL;
        struct file *filp = NULL;
@@ -2786,7 +2794,8 @@ int smb2_open(struct ksmbd_work *work)
                goto err_out1;
        }
 
-       rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS, &path, 1);
+       rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS,
+                                       &parent_path, &path, 1);
        if (!rc) {
                file_present = true;
 
@@ -2906,7 +2915,8 @@ int smb2_open(struct ksmbd_work *work)
 
        /*create file if not present */
        if (!file_present) {
-               rc = smb2_creat(work, &path, name, open_flags, posix_mode,
+               rc = smb2_creat(work, &parent_path, &path, name, open_flags,
+                               posix_mode,
                                req->CreateOptions & FILE_DIRECTORY_FILE_LE);
                if (rc) {
                        if (rc == -ENOENT) {
@@ -3321,8 +3331,9 @@ int smb2_open(struct ksmbd_work *work)
 
 err_out:
        if (file_present || created) {
-               inode_unlock(d_inode(path.dentry->d_parent));
-               dput(path.dentry);
+               inode_unlock(d_inode(parent_path.dentry));
+               path_put(&path);
+               path_put(&parent_path);
        }
        ksmbd_revert_fsids(work);
 err_out1:
@@ -4391,8 +4402,8 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
        }
 
        basic_info = (struct smb2_file_basic_info *)rsp->Buffer;
-       generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
-                        &stat);
+       generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+                        file_inode(fp->filp), &stat);
        basic_info->CreationTime = cpu_to_le64(fp->create_time);
        time = ksmbd_UnixTimeToNT(stat.atime);
        basic_info->LastAccessTime = cpu_to_le64(time);
@@ -4417,7 +4428,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
        struct kstat stat;
 
        inode = file_inode(fp->filp);
-       generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+       generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
 
        sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
        delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4471,7 +4482,7 @@ static int get_file_all_info(struct ksmbd_work *work,
                return PTR_ERR(filename);
 
        inode = file_inode(fp->filp);
-       generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+       generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
 
        ksmbd_debug(SMB, "filename = %s\n", filename);
        delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4548,8 +4559,8 @@ static void get_file_stream_info(struct ksmbd_work *work,
        int buf_free_len;
        struct smb2_query_info_req *req = ksmbd_req_buf_next(work);
 
-       generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
-                        &stat);
+       generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+                        file_inode(fp->filp), &stat);
        file_info = (struct smb2_file_stream_info *)rsp->Buffer;
 
        buf_free_len =
@@ -4639,8 +4650,8 @@ static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
        struct smb2_file_internal_info *file_info;
        struct kstat stat;
 
-       generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
-                        &stat);
+       generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+                        file_inode(fp->filp), &stat);
        file_info = (struct smb2_file_internal_info *)rsp->Buffer;
        file_info->IndexNumber = cpu_to_le64(stat.ino);
        rsp->OutputBufferLength =
@@ -4665,7 +4676,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
        file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer;
 
        inode = file_inode(fp->filp);
-       generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+       generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
 
        file_info->CreationTime = cpu_to_le64(fp->create_time);
        time = ksmbd_UnixTimeToNT(stat.atime);
@@ -4726,8 +4737,8 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
        struct smb2_file_comp_info *file_info;
        struct kstat stat;
 
-       generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
-                        &stat);
+       generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+                        file_inode(fp->filp), &stat);
 
        file_info = (struct smb2_file_comp_info *)rsp->Buffer;
        file_info->CompressedFileSize = cpu_to_le64(stat.blocks << 9);
@@ -4779,7 +4790,7 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
        file_info->LastAccessTime = cpu_to_le64(time);
        time = ksmbd_UnixTimeToNT(inode->i_mtime);
        file_info->LastWriteTime = cpu_to_le64(time);
-       time = ksmbd_UnixTimeToNT(inode->i_ctime);
+       time = ksmbd_UnixTimeToNT(inode_get_ctime(inode));
        file_info->ChangeTime = cpu_to_le64(time);
        file_info->DosAttributes = fp->f_ci->m_fattr;
        file_info->Inode = cpu_to_le64(inode->i_ino);
@@ -5422,7 +5433,7 @@ int smb2_close(struct ksmbd_work *work)
                rsp->LastAccessTime = cpu_to_le64(time);
                time = ksmbd_UnixTimeToNT(inode->i_mtime);
                rsp->LastWriteTime = cpu_to_le64(time);
-               time = ksmbd_UnixTimeToNT(inode->i_ctime);
+               time = ksmbd_UnixTimeToNT(inode_get_ctime(inode));
                rsp->ChangeTime = cpu_to_le64(time);
                ksmbd_fd_put(work, fp);
        } else {
@@ -5545,7 +5556,7 @@ static int smb2_create_link(struct ksmbd_work *work,
                            struct nls_table *local_nls)
 {
        char *link_name = NULL, *target_name = NULL, *pathname = NULL;
-       struct path path;
+       struct path path, parent_path;
        bool file_present = false;
        int rc;
 
@@ -5575,7 +5586,7 @@ static int smb2_create_link(struct ksmbd_work *work,
 
        ksmbd_debug(SMB, "target name is %s\n", target_name);
        rc = ksmbd_vfs_kern_path_locked(work, link_name, LOOKUP_NO_SYMLINKS,
-                                       &path, 0);
+                                       &parent_path, &path, 0);
        if (rc) {
                if (rc != -ENOENT)
                        goto out;
@@ -5605,8 +5616,9 @@ static int smb2_create_link(struct ksmbd_work *work,
                rc = -EINVAL;
 out:
        if (file_present) {
-               inode_unlock(d_inode(path.dentry->d_parent));
+               inode_unlock(d_inode(parent_path.dentry));
                path_put(&path);
+               path_put(&parent_path);
        }
        if (!IS_ERR(link_name))
                kfree(link_name);
@@ -5644,7 +5656,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
        if (file_info->ChangeTime)
                attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
        else
-               attrs.ia_ctime = inode->i_ctime;
+               attrs.ia_ctime = inode_get_ctime(inode);
 
        if (file_info->LastWriteTime) {
                attrs.ia_mtime = ksmbd_NTtimeToUnix(file_info->LastWriteTime);
@@ -5689,7 +5701,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
                        return -EACCES;
 
                inode_lock(inode);
-               inode->i_ctime = attrs.ia_ctime;
+               inode_set_ctime_to_ts(inode, attrs.ia_ctime);
                attrs.ia_valid &= ~ATTR_CTIME;
                rc = notify_change(idmap, dentry, &attrs, NULL);
                inode_unlock(inode);
@@ -6209,6 +6221,11 @@ int smb2_read(struct ksmbd_work *work)
        unsigned int max_read_size = conn->vals->max_read_size;
 
        WORK_BUFFERS(work, req, rsp);
+       if (work->next_smb2_rcv_hdr_off) {
+               work->send_no_response = 1;
+               err = -EOPNOTSUPP;
+               goto out;
+       }
 
        if (test_share_config_flag(work->tcon->share_conf,
                                   KSMBD_SHARE_FLAG_PIPE)) {
@@ -8609,7 +8626,8 @@ int smb3_decrypt_req(struct ksmbd_work *work)
        struct smb2_transform_hdr *tr_hdr = smb2_get_msg(buf);
        int rc = 0;
 
-       if (buf_data_size < sizeof(struct smb2_hdr)) {
+       if (pdu_length < sizeof(struct smb2_transform_hdr) ||
+           buf_data_size < sizeof(struct smb2_hdr)) {
                pr_err("Transform message is too small (%u)\n",
                       pdu_length);
                return -ECONNABORTED;
index ef20f63..c2b75d8 100644 (file)
@@ -388,26 +388,29 @@ static struct smb_version_cmds smb1_server_cmds[1] = {
        [SMB_COM_NEGOTIATE_EX]  = { .proc = smb1_negotiate, },
 };
 
-static void init_smb1_server(struct ksmbd_conn *conn)
+static int init_smb1_server(struct ksmbd_conn *conn)
 {
        conn->ops = &smb1_server_ops;
        conn->cmds = smb1_server_cmds;
        conn->max_cmds = ARRAY_SIZE(smb1_server_cmds);
+       return 0;
 }
 
-void ksmbd_init_smb_server(struct ksmbd_work *work)
+int ksmbd_init_smb_server(struct ksmbd_work *work)
 {
        struct ksmbd_conn *conn = work->conn;
        __le32 proto;
 
-       if (conn->need_neg == false)
-               return;
-
        proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol;
+       if (conn->need_neg == false) {
+               if (proto == SMB1_PROTO_NUMBER)
+                       return -EINVAL;
+               return 0;
+       }
+
        if (proto == SMB1_PROTO_NUMBER)
-               init_smb1_server(conn);
-       else
-               init_smb3_11_server(conn);
+               return init_smb1_server(conn);
+       return init_smb3_11_server(conn);
 }
 
 int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
index aeca0f4..f109251 100644 (file)
@@ -427,7 +427,7 @@ bool ksmbd_smb_request(struct ksmbd_conn *conn);
 
 int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count);
 
-void ksmbd_init_smb_server(struct ksmbd_work *work);
+int ksmbd_init_smb_server(struct ksmbd_work *work);
 
 struct ksmbd_kstat;
 int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work,
index e359144..d48756a 100644 (file)
@@ -63,13 +63,13 @@ int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child)
 
 static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
                                        char *pathname, unsigned int flags,
+                                       struct path *parent_path,
                                        struct path *path)
 {
        struct qstr last;
        struct filename *filename;
        struct path *root_share_path = &share_conf->vfs_path;
        int err, type;
-       struct path parent_path;
        struct dentry *d;
 
        if (pathname[0] == '\0') {
@@ -84,7 +84,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
                return PTR_ERR(filename);
 
        err = vfs_path_parent_lookup(filename, flags,
-                                    &parent_path, &last, &type,
+                                    parent_path, &last, &type,
                                     root_share_path);
        if (err) {
                putname(filename);
@@ -92,13 +92,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
        }
 
        if (unlikely(type != LAST_NORM)) {
-               path_put(&parent_path);
+               path_put(parent_path);
                putname(filename);
                return -ENOENT;
        }
 
-       inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT);
-       d = lookup_one_qstr_excl(&last, parent_path.dentry, 0);
+       inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT);
+       d = lookup_one_qstr_excl(&last, parent_path->dentry, 0);
        if (IS_ERR(d))
                goto err_out;
 
@@ -108,15 +108,22 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
        }
 
        path->dentry = d;
-       path->mnt = share_conf->vfs_path.mnt;
-       path_put(&parent_path);
-       putname(filename);
+       path->mnt = mntget(parent_path->mnt);
 
+       if (test_share_config_flag(share_conf, KSMBD_SHARE_FLAG_CROSSMNT)) {
+               err = follow_down(path, 0);
+               if (err < 0) {
+                       path_put(path);
+                       goto err_out;
+               }
+       }
+
+       putname(filename);
        return 0;
 
 err_out:
-       inode_unlock(parent_path.dentry->d_inode);
-       path_put(&parent_path);
+       inode_unlock(d_inode(parent_path->dentry));
+       path_put(parent_path);
        putname(filename);
        return -ENOENT;
 }
@@ -412,7 +419,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
 {
        char *stream_buf = NULL, *wbuf;
        struct mnt_idmap *idmap = file_mnt_idmap(fp->filp);
-       size_t size, v_len;
+       size_t size;
+       ssize_t v_len;
        int err = 0;
 
        ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n",
@@ -429,9 +437,9 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
                                       fp->stream.name,
                                       fp->stream.size,
                                       &stream_buf);
-       if ((int)v_len < 0) {
+       if (v_len < 0) {
                pr_err("not found stream in xattr : %zd\n", v_len);
-               err = (int)v_len;
+               err = v_len;
                goto out;
        }
 
@@ -1194,14 +1202,14 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
  * Return:     0 on success, otherwise error
  */
 int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
-                              unsigned int flags, struct path *path,
-                              bool caseless)
+                              unsigned int flags, struct path *parent_path,
+                              struct path *path, bool caseless)
 {
        struct ksmbd_share_config *share_conf = work->tcon->share_conf;
        int err;
-       struct path parent_path;
 
-       err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, path);
+       err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, parent_path,
+                                          path);
        if (!err)
                return 0;
 
@@ -1216,10 +1224,10 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
                path_len = strlen(filepath);
                remain_len = path_len;
 
-               parent_path = share_conf->vfs_path;
-               path_get(&parent_path);
+               *parent_path = share_conf->vfs_path;
+               path_get(parent_path);
 
-               while (d_can_lookup(parent_path.dentry)) {
+               while (d_can_lookup(parent_path->dentry)) {
                        char *filename = filepath + path_len - remain_len;
                        char *next = strchrnul(filename, '/');
                        size_t filename_len = next - filename;
@@ -1228,7 +1236,7 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
                        if (filename_len == 0)
                                break;
 
-                       err = ksmbd_vfs_lookup_in_dir(&parent_path, filename,
+                       err = ksmbd_vfs_lookup_in_dir(parent_path, filename,
                                                      filename_len,
                                                      work->conn->um);
                        if (err)
@@ -1245,8 +1253,8 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
                                goto out2;
                        else if (is_last)
                                goto out1;
-                       path_put(&parent_path);
-                       parent_path = *path;
+                       path_put(parent_path);
+                       *parent_path = *path;
 
                        next[0] = '/';
                        remain_len -= filename_len + 1;
@@ -1254,16 +1262,17 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
 
                err = -EINVAL;
 out2:
-               path_put(&parent_path);
+               path_put(parent_path);
 out1:
                kfree(filepath);
        }
 
        if (!err) {
-               err = ksmbd_vfs_lock_parent(parent_path.dentry, path->dentry);
-               if (err)
-                       dput(path->dentry);
-               path_put(&parent_path);
+               err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
+               if (err) {
+                       path_put(path);
+                       path_put(parent_path);
+               }
        }
        return err;
 }
@@ -1650,7 +1659,8 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
        u64 time;
        int rc;
 
-       generic_fillattr(idmap, d_inode(dentry), ksmbd_kstat->kstat);
+       generic_fillattr(idmap, STATX_BASIC_STATS, d_inode(dentry),
+                        ksmbd_kstat->kstat);
 
        time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
        ksmbd_kstat->create_time = time;
index 8003931..72f9fb4 100644 (file)
@@ -115,8 +115,8 @@ int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
 int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
                           const struct path *path, char *attr_name);
 int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
-                              unsigned int flags, struct path *path,
-                              bool caseless);
+                              unsigned int flags, struct path *parent_path,
+                              struct path *path, bool caseless);
 struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
                                          const char *name,
                                          unsigned int flags,
index 004eb1c..0263101 100644 (file)
@@ -120,17 +120,17 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
 static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
                                       struct pipe_buffer *buf)
 {
-       struct page *page = buf->page;
+       struct folio *folio = page_folio(buf->page);
        int err;
 
-       if (!PageUptodate(page)) {
-               lock_page(page);
+       if (!folio_test_uptodate(folio)) {
+               folio_lock(folio);
 
                /*
-                * Page got truncated/unhashed. This will cause a 0-byte
+                * Folio got truncated/unhashed. This will cause a 0-byte
                 * splice, if this is the first page.
                 */
-               if (!page->mapping) {
+               if (!folio->mapping) {
                        err = -ENODATA;
                        goto error;
                }
@@ -138,20 +138,18 @@ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
                /*
                 * Uh oh, read-error from disk.
                 */
-               if (!PageUptodate(page)) {
+               if (!folio_test_uptodate(folio)) {
                        err = -EIO;
                        goto error;
                }
 
-               /*
-                * Page is ok afterall, we are done.
-                */
-               unlock_page(page);
+               /* Folio is ok after all, we are done */
+               folio_unlock(folio);
        }
 
        return 0;
 error:
-       unlock_page(page);
+       folio_unlock(folio);
        return err;
 }
 
@@ -876,6 +874,8 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
                        msg.msg_flags |= MSG_MORE;
                if (remain && pipe_occupancy(pipe->head, tail) > 0)
                        msg.msg_flags |= MSG_MORE;
+               if (out->f_flags & O_NONBLOCK)
+                       msg.msg_flags |= MSG_DONTWAIT;
 
                iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc,
                              len - remain);
@@ -1267,10 +1267,8 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
                if ((in->f_flags | out->f_flags) & O_NONBLOCK)
                        flags |= SPLICE_F_NONBLOCK;
 
-               return splice_pipe_to_pipe(ipipe, opipe, len, flags);
-       }
-
-       if (ipipe) {
+               ret = splice_pipe_to_pipe(ipipe, opipe, len, flags);
+       } else if (ipipe) {
                if (off_in)
                        return -ESPIPE;
                if (off_out) {
@@ -1295,18 +1293,11 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
                ret = do_splice_from(ipipe, out, &offset, len, flags);
                file_end_write(out);
 
-               if (ret > 0)
-                       fsnotify_modify(out);
-
                if (!off_out)
                        out->f_pos = offset;
                else
                        *off_out = offset;
-
-               return ret;
-       }
-
-       if (opipe) {
+       } else if (opipe) {
                if (off_out)
                        return -ESPIPE;
                if (off_in) {
@@ -1322,18 +1313,25 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
 
                ret = splice_file_to_pipe(in, opipe, &offset, len, flags);
 
-               if (ret > 0)
-                       fsnotify_access(in);
-
                if (!off_in)
                        in->f_pos = offset;
                else
                        *off_in = offset;
+       } else {
+               ret = -EINVAL;
+       }
 
-               return ret;
+       if (ret > 0) {
+               /*
+                * Generate modify out before access in:
+                * do_splice_from() may've already sent modify out,
+                * and this ensures the events get merged.
+                */
+               fsnotify_modify(out);
+               fsnotify_access(in);
        }
 
-       return -EINVAL;
+       return ret;
 }
 
 static long __do_splice(struct file *in, loff_t __user *off_in,
@@ -1462,6 +1460,9 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
                pipe_unlock(pipe);
        }
 
+       if (ret > 0)
+               fsnotify_access(file);
+
        return ret;
 }
 
@@ -1491,8 +1492,10 @@ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
        if (!ret)
                ret = iter_to_pipe(iter, pipe, buf_flag);
        pipe_unlock(pipe);
-       if (ret > 0)
+       if (ret > 0) {
                wakeup_pipe_readers(pipe);
+               fsnotify_modify(file);
+       }
        return ret;
 }
 
@@ -1926,6 +1929,11 @@ long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
                }
        }
 
+       if (ret > 0) {
+               fsnotify_access(in);
+               fsnotify_modify(out);
+       }
+
        return ret;
 }
 
index 2446314..c6e626b 100644 (file)
@@ -61,7 +61,7 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
        inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
        inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime);
        inode->i_atime.tv_sec = inode->i_mtime.tv_sec;
-       inode->i_ctime.tv_sec = inode->i_mtime.tv_sec;
+       inode_set_ctime(inode, inode->i_mtime.tv_sec, 0);
        inode->i_mode = le16_to_cpu(sqsh_ino->mode);
        inode->i_size = 0;
 
index c983092..b5e01bd 100644 (file)
@@ -68,7 +68,7 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
        dest->i_rdev = src->i_rdev;
        dest->i_atime = src->i_atime;
        dest->i_mtime = src->i_mtime;
-       dest->i_ctime = src->i_ctime;
+       inode_set_ctime_to_ts(dest, inode_get_ctime(src));
        dest->i_blkbits = src->i_blkbits;
        dest->i_flags = src->i_flags;
        set_nlink(dest, src->i_nlink);
index 7c238da..136711a 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
 #include "mount.h"
 
 /**
+ * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED
+ * @stat: where to store the resulting values
+ * @request_mask: STATX_* values requested
+ * @inode: inode from which to grab the c/mtime
+ *
+ * Given @inode, grab the ctime and mtime out if it and store the result
+ * in @stat. When fetching the value, flag it as queried so the next write
+ * will use a fine-grained timestamp.
+ */
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode)
+{
+       atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec;
+
+       /* If neither time was requested, then don't report them */
+       if (!(request_mask & (STATX_CTIME|STATX_MTIME))) {
+               stat->result_mask &= ~(STATX_CTIME|STATX_MTIME);
+               return;
+       }
+
+       stat->mtime = inode->i_mtime;
+       stat->ctime.tv_sec = inode->__i_ctime.tv_sec;
+       /*
+        * Atomically set the QUERIED flag and fetch the new value with
+        * the flag masked off.
+        */
+       stat->ctime.tv_nsec = atomic_long_fetch_or(I_CTIME_QUERIED, pnsec) &
+                                       ~I_CTIME_QUERIED;
+}
+EXPORT_SYMBOL(fill_mg_cmtime);
+
+/**
  * generic_fillattr - Fill in the basic attributes from the inode struct
- * @idmap:     idmap of the mount the inode was found from
- * @inode:     Inode to use as the source
- * @stat:      Where to fill in the attributes
+ * @idmap:             idmap of the mount the inode was found from
+ * @request_mask:      statx request_mask
+ * @inode:             Inode to use as the source
+ * @stat:              Where to fill in the attributes
  *
  * Fill in the basic attributes in the kstat structure from data that's to be
  * found on the VFS inode structure.  This is the default if no getattr inode
@@ -42,8 +74,8 @@
  * uid and gid filds. On non-idmapped mounts or if permission checking is to be
  * performed on the raw inode simply passs @nop_mnt_idmap.
  */
-void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode,
-                     struct kstat *stat)
+void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
+                     struct inode *inode, struct kstat *stat)
 {
        vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
        vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
@@ -57,10 +89,22 @@ void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode,
        stat->rdev = inode->i_rdev;
        stat->size = i_size_read(inode);
        stat->atime = inode->i_atime;
-       stat->mtime = inode->i_mtime;
-       stat->ctime = inode->i_ctime;
+
+       if (is_mgtime(inode)) {
+               fill_mg_cmtime(stat, request_mask, inode);
+       } else {
+               stat->mtime = inode->i_mtime;
+               stat->ctime = inode_get_ctime(inode);
+       }
+
        stat->blksize = i_blocksize(inode);
        stat->blocks = inode->i_blocks;
+
+       if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
+               stat->result_mask |= STATX_CHANGE_COOKIE;
+               stat->change_cookie = inode_query_iversion(inode);
+       }
+
 }
 EXPORT_SYMBOL(generic_fillattr);
 
@@ -123,17 +167,12 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
        stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
                                  STATX_ATTR_DAX);
 
-       if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
-               stat->result_mask |= STATX_CHANGE_COOKIE;
-               stat->change_cookie = inode_query_iversion(inode);
-       }
-
        idmap = mnt_idmap(path->mnt);
        if (inode->i_op->getattr)
                return inode->i_op->getattr(idmap, path, stat,
                                            request_mask, query_flags);
 
-       generic_fillattr(idmap, inode, stat);
+       generic_fillattr(idmap, request_mask, inode, stat);
        return 0;
 }
 EXPORT_SYMBOL(vfs_getattr_nosec);
index e781226..692654c 100644 (file)
@@ -39,7 +39,7 @@
 #include <uapi/linux/mount.h>
 #include "internal.h"
 
-static int thaw_super_locked(struct super_block *sb);
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who);
 
 static LIST_HEAD(super_blocks);
 static DEFINE_SPINLOCK(sb_lock);
@@ -50,6 +50,130 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
        "sb_internal",
 };
 
+static inline void __super_lock(struct super_block *sb, bool excl)
+{
+       if (excl)
+               down_write(&sb->s_umount);
+       else
+               down_read(&sb->s_umount);
+}
+
+static inline void super_unlock(struct super_block *sb, bool excl)
+{
+       if (excl)
+               up_write(&sb->s_umount);
+       else
+               up_read(&sb->s_umount);
+}
+
+static inline void __super_lock_excl(struct super_block *sb)
+{
+       __super_lock(sb, true);
+}
+
+static inline void super_unlock_excl(struct super_block *sb)
+{
+       super_unlock(sb, true);
+}
+
+static inline void super_unlock_shared(struct super_block *sb)
+{
+       super_unlock(sb, false);
+}
+
+static inline bool wait_born(struct super_block *sb)
+{
+       unsigned int flags;
+
+       /*
+        * Pairs with smp_store_release() in super_wake() and ensures
+        * that we see SB_BORN or SB_DYING after we're woken.
+        */
+       flags = smp_load_acquire(&sb->s_flags);
+       return flags & (SB_BORN | SB_DYING);
+}
+
+/**
+ * super_lock - wait for superblock to become ready and lock it
+ * @sb: superblock to wait for
+ * @excl: whether exclusive access is required
+ *
+ * If the superblock has neither passed through vfs_get_tree() or
+ * generic_shutdown_super() yet wait for it to happen. Either superblock
+ * creation will succeed and SB_BORN is set by vfs_get_tree() or we're
+ * woken and we'll see SB_DYING.
+ *
+ * The caller must have acquired a temporary reference on @sb->s_count.
+ *
+ * Return: This returns true if SB_BORN was set, false if SB_DYING was
+ *         set. The function acquires s_umount and returns with it held.
+ */
+static __must_check bool super_lock(struct super_block *sb, bool excl)
+{
+
+       lockdep_assert_not_held(&sb->s_umount);
+
+relock:
+       __super_lock(sb, excl);
+
+       /*
+        * Has gone through generic_shutdown_super() in the meantime.
+        * @sb->s_root is NULL and @sb->s_active is 0. No one needs to
+        * grab a reference to this. Tell them so.
+        */
+       if (sb->s_flags & SB_DYING)
+               return false;
+
+       /* Has called ->get_tree() successfully. */
+       if (sb->s_flags & SB_BORN)
+               return true;
+
+       super_unlock(sb, excl);
+
+       /* wait until the superblock is ready or dying */
+       wait_var_event(&sb->s_flags, wait_born(sb));
+
+       /*
+        * Neither SB_BORN nor SB_DYING are ever unset so we never loop.
+        * Just reacquire @sb->s_umount for the caller.
+        */
+       goto relock;
+}
+
+/* wait and acquire read-side of @sb->s_umount */
+static inline bool super_lock_shared(struct super_block *sb)
+{
+       return super_lock(sb, false);
+}
+
+/* wait and acquire write-side of @sb->s_umount */
+static inline bool super_lock_excl(struct super_block *sb)
+{
+       return super_lock(sb, true);
+}
+
+/* wake waiters */
+#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
+static void super_wake(struct super_block *sb, unsigned int flag)
+{
+       WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
+       WARN_ON_ONCE(hweight32(flag & SUPER_WAKE_FLAGS) > 1);
+
+       /*
+        * Pairs with smp_load_acquire() in super_lock() to make sure
+        * all initializations in the superblock are seen by the user
+        * seeing SB_BORN sent.
+        */
+       smp_store_release(&sb->s_flags, sb->s_flags | flag);
+       /*
+        * Pairs with the barrier in prepare_to_wait_event() to make sure
+        * ___wait_var_event() either sees SB_BORN set or
+        * waitqueue_active() check in wake_up_var() sees the waiter.
+        */
+       smp_mb();
+       wake_up_var(&sb->s_flags);
+}
+
 /*
  * One thing we have to be careful of with a per-sb shrinker is that we don't
  * drop the last active reference to the superblock from within the shrinker.
@@ -76,7 +200,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
        if (!(sc->gfp_mask & __GFP_FS))
                return SHRINK_STOP;
 
-       if (!trylock_super(sb))
+       if (!super_trylock_shared(sb))
                return SHRINK_STOP;
 
        if (sb->s_op->nr_cached_objects)
@@ -110,7 +234,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
                freed += sb->s_op->free_cached_objects(sb, sc);
        }
 
-       up_read(&sb->s_umount);
+       super_unlock_shared(sb);
        return freed;
 }
 
@@ -123,17 +247,17 @@ static unsigned long super_cache_count(struct shrinker *shrink,
        sb = container_of(shrink, struct super_block, s_shrink);
 
        /*
-        * We don't call trylock_super() here as it is a scalability bottleneck,
-        * so we're exposed to partial setup state. The shrinker rwsem does not
-        * protect filesystem operations backing list_lru_shrink_count() or
-        * s_op->nr_cached_objects(). Counts can change between
-        * super_cache_count and super_cache_scan, so we really don't need locks
-        * here.
+        * We don't call super_trylock_shared() here as it is a scalability
+        * bottleneck, so we're exposed to partial setup state. The shrinker
+        * rwsem does not protect filesystem operations backing
+        * list_lru_shrink_count() or s_op->nr_cached_objects(). Counts can
+        * change between super_cache_count and super_cache_scan, so we really
+        * don't need locks here.
         *
         * However, if we are currently mounting the superblock, the underlying
         * filesystem might be in a state of partial construction and hence it
-        * is dangerous to access it.  trylock_super() uses a SB_BORN check to
-        * avoid this situation, so do the same here. The memory barrier is
+        * is dangerous to access it.  super_trylock_shared() uses a SB_BORN check
+        * to avoid this situation, so do the same here. The memory barrier is
         * matched with the one in mount_fs() as we don't hold locks here.
         */
        if (!(sb->s_flags & SB_BORN))
@@ -176,7 +300,7 @@ static void destroy_unused_super(struct super_block *s)
 {
        if (!s)
                return;
-       up_write(&s->s_umount);
+       super_unlock_excl(s);
        list_lru_destroy(&s->s_dentry_lru);
        list_lru_destroy(&s->s_inode_lru);
        security_sb_free(s);
@@ -337,10 +461,29 @@ void deactivate_locked_super(struct super_block *s)
                list_lru_destroy(&s->s_dentry_lru);
                list_lru_destroy(&s->s_inode_lru);
 
+               /*
+                * Remove it from @fs_supers so it isn't found by new
+                * sget{_fc}() walkers anymore. Any concurrent mounter still
+                * managing to grab a temporary reference is guaranteed to
+                * already see SB_DYING and will wait until we notify them about
+                * SB_DEAD.
+                */
+               spin_lock(&sb_lock);
+               hlist_del_init(&s->s_instances);
+               spin_unlock(&sb_lock);
+
+               /*
+                * Let concurrent mounts know that this thing is really dead.
+                * We don't need @sb->s_umount here as every concurrent caller
+                * will see SB_DYING and either discard the superblock or wait
+                * for SB_DEAD.
+                */
+               super_wake(s, SB_DEAD);
+
                put_filesystem(fs);
                put_super(s);
        } else {
-               up_write(&s->s_umount);
+               super_unlock_excl(s);
        }
 }
 
@@ -357,7 +500,7 @@ EXPORT_SYMBOL(deactivate_locked_super);
 void deactivate_super(struct super_block *s)
 {
        if (!atomic_add_unless(&s->s_active, -1, 1)) {
-               down_write(&s->s_umount);
+               __super_lock_excl(s);
                deactivate_locked_super(s);
        }
 }
@@ -379,20 +522,61 @@ EXPORT_SYMBOL(deactivate_super);
  */
 static int grab_super(struct super_block *s) __releases(sb_lock)
 {
+       bool born;
+
        s->s_count++;
        spin_unlock(&sb_lock);
-       down_write(&s->s_umount);
-       if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) {
+       born = super_lock_excl(s);
+       if (born && atomic_inc_not_zero(&s->s_active)) {
                put_super(s);
                return 1;
        }
-       up_write(&s->s_umount);
+       super_unlock_excl(s);
        put_super(s);
        return 0;
 }
 
+static inline bool wait_dead(struct super_block *sb)
+{
+       unsigned int flags;
+
+       /*
+        * Pairs with memory barrier in super_wake() and ensures
+        * that we see SB_DEAD after we're woken.
+        */
+       flags = smp_load_acquire(&sb->s_flags);
+       return flags & SB_DEAD;
+}
+
+/**
+ * grab_super_dead - acquire an active reference to a superblock
+ * @sb: superblock to acquire
+ *
+ * Acquire a temporary reference on a superblock and try to trade it for
+ * an active reference. This is used in sget{_fc}() to wait for a
+ * superblock to either become SB_BORN or for it to pass through
+ * sb->kill() and be marked as SB_DEAD.
+ *
+ * Return: This returns true if an active reference could be acquired,
+ *         false if not.
+ */
+static bool grab_super_dead(struct super_block *sb)
+{
+
+       sb->s_count++;
+       if (grab_super(sb)) {
+               put_super(sb);
+               lockdep_assert_held(&sb->s_umount);
+               return true;
+       }
+       wait_var_event(&sb->s_flags, wait_dead(sb));
+       put_super(sb);
+       lockdep_assert_not_held(&sb->s_umount);
+       return false;
+}
+
 /*
- *     trylock_super - try to grab ->s_umount shared
+ *     super_trylock_shared - try to grab ->s_umount shared
  *     @sb: reference we are trying to grab
  *
  *     Try to prevent fs shutdown.  This is used in places where we
@@ -408,13 +592,13 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
  *     of down_read().  There's a couple of places that are OK with that, but
  *     it's very much not a general-purpose interface.
  */
-bool trylock_super(struct super_block *sb)
+bool super_trylock_shared(struct super_block *sb)
 {
        if (down_read_trylock(&sb->s_umount)) {
-               if (!hlist_unhashed(&sb->s_instances) &&
-                   sb->s_root && (sb->s_flags & SB_BORN))
+               if (!(sb->s_flags & SB_DYING) && sb->s_root &&
+                   (sb->s_flags & SB_BORN))
                        return true;
-               up_read(&sb->s_umount);
+               super_unlock_shared(sb);
        }
 
        return false;
@@ -439,13 +623,13 @@ bool trylock_super(struct super_block *sb)
 void retire_super(struct super_block *sb)
 {
        WARN_ON(!sb->s_bdev);
-       down_write(&sb->s_umount);
+       __super_lock_excl(sb);
        if (sb->s_iflags & SB_I_PERSB_BDI) {
                bdi_unregister(sb->s_bdi);
                sb->s_iflags &= ~SB_I_PERSB_BDI;
        }
        sb->s_iflags |= SB_I_RETIRED;
-       up_write(&sb->s_umount);
+       super_unlock_excl(sb);
 }
 EXPORT_SYMBOL(retire_super);
 
@@ -517,11 +701,17 @@ void generic_shutdown_super(struct super_block *sb)
                        spin_unlock(&sb->s_inode_list_lock);
                }
        }
-       spin_lock(&sb_lock);
-       /* should be initialized for __put_super_and_need_restart() */
-       hlist_del_init(&sb->s_instances);
-       spin_unlock(&sb_lock);
-       up_write(&sb->s_umount);
+       /*
+        * Broadcast to everyone that grabbed a temporary reference to this
+        * superblock before we removed it from @fs_supers that the superblock
+        * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
+        * discard this superblock and treat it as dead.
+        *
+        * We leave the superblock on @fs_supers so it can be found by
+        * sget{_fc}() until we passed sb->kill_sb().
+        */
+       super_wake(sb, SB_DYING);
+       super_unlock_excl(sb);
        if (sb->s_bdi != &noop_backing_dev_info) {
                if (sb->s_iflags & SB_I_PERSB_BDI)
                        bdi_unregister(sb->s_bdi);
@@ -546,17 +736,31 @@ bool mount_capable(struct fs_context *fc)
  * @test: Comparison callback
  * @set: Setup callback
  *
- * Find or create a superblock using the parameters stored in the filesystem
- * context and the two callback functions.
+ * Create a new superblock or find an existing one.
+ *
+ * The @test callback is used to find a matching existing superblock.
+ * Whether or not the requested parameters in @fc are taken into account
+ * is specific to the @test callback that is used. They may even be
+ * completely ignored.
+ *
+ * If an extant superblock is matched, it will be returned unless:
  *
- * If an extant superblock is matched, then that will be returned with an
- * elevated reference count that the caller must transfer or discard.
+ * (1) the namespace the filesystem context @fc and the extant
+ *     superblock's namespace differ
+ *
+ * (2) the filesystem context @fc has requested that reusing an extant
+ *     superblock is not allowed
+ *
+ * In both cases EBUSY will be returned.
  *
  * If no match is made, a new superblock will be allocated and basic
- * initialisation will be performed (s_type, s_fs_info and s_id will be set and
- * the set() callback will be invoked), the superblock will be published and it
- * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
- * as yet unset.
+ * initialisation will be performed (s_type, s_fs_info and s_id will be
+ * set and the @set callback will be invoked), the superblock will be
+ * published and it will be returned in a partially constructed state
+ * with SB_BORN and SB_ACTIVE as yet unset.
+ *
+ * Return: On success, an extant or newly created superblock is
+ *         returned. On failure an error pointer is returned.
  */
 struct super_block *sget_fc(struct fs_context *fc,
                            int (*test)(struct super_block *, struct fs_context *),
@@ -595,6 +799,11 @@ retry:
        s->s_type = fc->fs_type;
        s->s_iflags |= fc->s_iflags;
        strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
+       /*
+        * Make the superblock visible on @super_blocks and @fs_supers.
+        * It's in a nascent state and users should wait on SB_BORN or
+        * SB_DYING to be set.
+        */
        list_add_tail(&s->s_list, &super_blocks);
        hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
        spin_unlock(&sb_lock);
@@ -603,12 +812,16 @@ retry:
        return s;
 
 share_extant_sb:
-       if (user_ns != old->s_user_ns) {
+       if (user_ns != old->s_user_ns || fc->exclusive) {
                spin_unlock(&sb_lock);
                destroy_unused_super(s);
+               if (fc->exclusive)
+                       warnfc(fc, "reusing existing filesystem not allowed");
+               else
+                       warnfc(fc, "reusing existing filesystem in another namespace not allowed");
                return ERR_PTR(-EBUSY);
        }
-       if (!grab_super(old))
+       if (!grab_super_dead(old))
                goto retry;
        destroy_unused_super(s);
        return old;
@@ -652,7 +865,7 @@ retry:
                                destroy_unused_super(s);
                                return ERR_PTR(-EBUSY);
                        }
-                       if (!grab_super(old))
+                       if (!grab_super_dead(old))
                                goto retry;
                        destroy_unused_super(s);
                        return old;
@@ -685,7 +898,7 @@ EXPORT_SYMBOL(sget);
 
 void drop_super(struct super_block *sb)
 {
-       up_read(&sb->s_umount);
+       super_unlock_shared(sb);
        put_super(sb);
 }
 
@@ -693,7 +906,7 @@ EXPORT_SYMBOL(drop_super);
 
 void drop_super_exclusive(struct super_block *sb)
 {
-       up_write(&sb->s_umount);
+       super_unlock_excl(sb);
        put_super(sb);
 }
 EXPORT_SYMBOL(drop_super_exclusive);
@@ -704,7 +917,8 @@ static void __iterate_supers(void (*f)(struct super_block *))
 
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
-               if (hlist_unhashed(&sb->s_instances))
+               /* Pairs with memory marrier in super_wake(). */
+               if (smp_load_acquire(&sb->s_flags) & SB_DYING)
                        continue;
                sb->s_count++;
                spin_unlock(&sb_lock);
@@ -734,15 +948,15 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
 
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
-               if (hlist_unhashed(&sb->s_instances))
-                       continue;
+               bool born;
+
                sb->s_count++;
                spin_unlock(&sb_lock);
 
-               down_read(&sb->s_umount);
-               if (sb->s_root && (sb->s_flags & SB_BORN))
+               born = super_lock_shared(sb);
+               if (born && sb->s_root)
                        f(sb, arg);
-               up_read(&sb->s_umount);
+               super_unlock_shared(sb);
 
                spin_lock(&sb_lock);
                if (p)
@@ -770,13 +984,15 @@ void iterate_supers_type(struct file_system_type *type,
 
        spin_lock(&sb_lock);
        hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
+               bool born;
+
                sb->s_count++;
                spin_unlock(&sb_lock);
 
-               down_read(&sb->s_umount);
-               if (sb->s_root && (sb->s_flags & SB_BORN))
+               born = super_lock_shared(sb);
+               if (born && sb->s_root)
                        f(sb, arg);
-               up_read(&sb->s_umount);
+               super_unlock_shared(sb);
 
                spin_lock(&sb_lock);
                if (p)
@@ -791,43 +1007,6 @@ void iterate_supers_type(struct file_system_type *type,
 EXPORT_SYMBOL(iterate_supers_type);
 
 /**
- * get_super - get the superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device given. %NULL is returned if no match is found.
- */
-struct super_block *get_super(struct block_device *bdev)
-{
-       struct super_block *sb;
-
-       if (!bdev)
-               return NULL;
-
-       spin_lock(&sb_lock);
-rescan:
-       list_for_each_entry(sb, &super_blocks, s_list) {
-               if (hlist_unhashed(&sb->s_instances))
-                       continue;
-               if (sb->s_bdev == bdev) {
-                       sb->s_count++;
-                       spin_unlock(&sb_lock);
-                       down_read(&sb->s_umount);
-                       /* still alive? */
-                       if (sb->s_root && (sb->s_flags & SB_BORN))
-                               return sb;
-                       up_read(&sb->s_umount);
-                       /* nope, got unmounted */
-                       spin_lock(&sb_lock);
-                       __put_super(sb);
-                       goto rescan;
-               }
-       }
-       spin_unlock(&sb_lock);
-       return NULL;
-}
-
-/**
  * get_active_super - get an active reference to the superblock of a device
  * @bdev: device to get the superblock for
  *
@@ -842,15 +1021,12 @@ struct super_block *get_active_super(struct block_device *bdev)
        if (!bdev)
                return NULL;
 
-restart:
        spin_lock(&sb_lock);
        list_for_each_entry(sb, &super_blocks, s_list) {
-               if (hlist_unhashed(&sb->s_instances))
-                       continue;
                if (sb->s_bdev == bdev) {
                        if (!grab_super(sb))
-                               goto restart;
-                       up_write(&sb->s_umount);
+                               return NULL;
+                       super_unlock_excl(sb);
                        return sb;
                }
        }
@@ -863,28 +1039,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
        struct super_block *sb;
 
        spin_lock(&sb_lock);
-rescan:
        list_for_each_entry(sb, &super_blocks, s_list) {
-               if (hlist_unhashed(&sb->s_instances))
-                       continue;
                if (sb->s_dev ==  dev) {
+                       bool born;
+
                        sb->s_count++;
                        spin_unlock(&sb_lock);
-                       if (excl)
-                               down_write(&sb->s_umount);
-                       else
-                               down_read(&sb->s_umount);
                        /* still alive? */
-                       if (sb->s_root && (sb->s_flags & SB_BORN))
+                       born = super_lock(sb, excl);
+                       if (born && sb->s_root)
                                return sb;
-                       if (excl)
-                               up_write(&sb->s_umount);
-                       else
-                               up_read(&sb->s_umount);
+                       super_unlock(sb, excl);
                        /* nope, got unmounted */
                        spin_lock(&sb_lock);
                        __put_super(sb);
-                       goto rescan;
+                       break;
                }
        }
        spin_unlock(&sb_lock);
@@ -926,9 +1095,9 @@ int reconfigure_super(struct fs_context *fc)
 
        if (remount_ro) {
                if (!hlist_empty(&sb->s_pins)) {
-                       up_write(&sb->s_umount);
+                       super_unlock_excl(sb);
                        group_pin_kill(&sb->s_pins);
-                       down_write(&sb->s_umount);
+                       __super_lock_excl(sb);
                        if (!sb->s_root)
                                return 0;
                        if (sb->s_writers.frozen != SB_UNFROZEN)
@@ -991,9 +1160,9 @@ cancel_readonly:
 
 static void do_emergency_remount_callback(struct super_block *sb)
 {
-       down_write(&sb->s_umount);
-       if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
-           !sb_rdonly(sb)) {
+       bool born = super_lock_excl(sb);
+
+       if (born && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) {
                struct fs_context *fc;
 
                fc = fs_context_for_reconfigure(sb->s_root,
@@ -1004,7 +1173,7 @@ static void do_emergency_remount_callback(struct super_block *sb)
                        put_fs_context(fc);
                }
        }
-       up_write(&sb->s_umount);
+       super_unlock_excl(sb);
 }
 
 static void do_emergency_remount(struct work_struct *work)
@@ -1027,12 +1196,13 @@ void emergency_remount(void)
 
 static void do_thaw_all_callback(struct super_block *sb)
 {
-       down_write(&sb->s_umount);
-       if (sb->s_root && sb->s_flags & SB_BORN) {
+       bool born = super_lock_excl(sb);
+
+       if (born && sb->s_root) {
                emergency_thaw_bdev(sb);
-               thaw_super_locked(sb);
+               thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE);
        } else {
-               up_write(&sb->s_umount);
+               super_unlock_excl(sb);
        }
 }
 
@@ -1136,7 +1306,7 @@ static int test_single_super(struct super_block *s, struct fs_context *fc)
        return 1;
 }
 
-static int vfs_get_super(struct fs_context *fc, bool reconf,
+static int vfs_get_super(struct fs_context *fc,
                int (*test)(struct super_block *, struct fs_context *),
                int (*fill_super)(struct super_block *sb,
                                  struct fs_context *fc))
@@ -1154,19 +1324,9 @@ static int vfs_get_super(struct fs_context *fc, bool reconf,
                        goto error;
 
                sb->s_flags |= SB_ACTIVE;
-               fc->root = dget(sb->s_root);
-       } else {
-               fc->root = dget(sb->s_root);
-               if (reconf) {
-                       err = reconfigure_super(fc);
-                       if (err < 0) {
-                               dput(fc->root);
-                               fc->root = NULL;
-                               goto error;
-                       }
-               }
        }
 
+       fc->root = dget(sb->s_root);
        return 0;
 
 error:
@@ -1178,7 +1338,7 @@ int get_tree_nodev(struct fs_context *fc,
                  int (*fill_super)(struct super_block *sb,
                                    struct fs_context *fc))
 {
-       return vfs_get_super(fc, false, NULL, fill_super);
+       return vfs_get_super(fc, NULL, fill_super);
 }
 EXPORT_SYMBOL(get_tree_nodev);
 
@@ -1186,54 +1346,81 @@ int get_tree_single(struct fs_context *fc,
                  int (*fill_super)(struct super_block *sb,
                                    struct fs_context *fc))
 {
-       return vfs_get_super(fc, false, test_single_super, fill_super);
+       return vfs_get_super(fc, test_single_super, fill_super);
 }
 EXPORT_SYMBOL(get_tree_single);
 
-int get_tree_single_reconf(struct fs_context *fc,
-                 int (*fill_super)(struct super_block *sb,
-                                   struct fs_context *fc))
-{
-       return vfs_get_super(fc, true, test_single_super, fill_super);
-}
-EXPORT_SYMBOL(get_tree_single_reconf);
-
 int get_tree_keyed(struct fs_context *fc,
                  int (*fill_super)(struct super_block *sb,
                                    struct fs_context *fc),
                void *key)
 {
        fc->s_fs_info = key;
-       return vfs_get_super(fc, false, test_keyed_super, fill_super);
+       return vfs_get_super(fc, test_keyed_super, fill_super);
 }
 EXPORT_SYMBOL(get_tree_keyed);
 
 #ifdef CONFIG_BLOCK
-static void fs_mark_dead(struct block_device *bdev)
+/*
+ * Lock a super block that the callers holds a reference to.
+ *
+ * The caller needs to ensure that the super_block isn't being freed while
+ * calling this function, e.g. by holding a lock over the call to this function
+ * and the place that clears the pointer to the superblock used by this function
+ * before freeing the superblock.
+ */
+static bool super_lock_shared_active(struct super_block *sb)
 {
-       struct super_block *sb;
+       bool born = super_lock_shared(sb);
+
+       if (!born || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
+               super_unlock_shared(sb);
+               return false;
+       }
+       return true;
+}
+
+static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
+{
+       struct super_block *sb = bdev->bd_holder;
+
+       /* bd_holder_lock ensures that the sb isn't freed */
+       lockdep_assert_held(&bdev->bd_holder_lock);
 
-       sb = get_super(bdev);
-       if (!sb)
+       if (!super_lock_shared_active(sb))
                return;
 
+       if (!surprise)
+               sync_filesystem(sb);
+       shrink_dcache_sb(sb);
+       invalidate_inodes(sb);
        if (sb->s_op->shutdown)
                sb->s_op->shutdown(sb);
-       drop_super(sb);
+
+       super_unlock_shared(sb);
+}
+
+static void fs_bdev_sync(struct block_device *bdev)
+{
+       struct super_block *sb = bdev->bd_holder;
+
+       lockdep_assert_held(&bdev->bd_holder_lock);
+
+       if (!super_lock_shared_active(sb))
+               return;
+       sync_filesystem(sb);
+       super_unlock_shared(sb);
 }
 
-static const struct blk_holder_ops fs_holder_ops = {
-       .mark_dead              = fs_mark_dead,
+const struct blk_holder_ops fs_holder_ops = {
+       .mark_dead              = fs_bdev_mark_dead,
+       .sync                   = fs_bdev_sync,
 };
+EXPORT_SYMBOL_GPL(fs_holder_ops);
 
 static int set_bdev_super(struct super_block *s, void *data)
 {
-       s->s_bdev = data;
-       s->s_dev = s->s_bdev->bd_dev;
-       s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
-
-       if (bdev_stable_writes(s->s_bdev))
-               s->s_iflags |= SB_I_STABLE_WRITES;
+       s->s_dev = *(dev_t *)data;
        return 0;
 }
 
@@ -1244,8 +1431,63 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 
 static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
 {
-       return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key;
+       return !(s->s_iflags & SB_I_RETIRED) &&
+               s->s_dev == *(dev_t *)fc->sget_key;
+}
+
+int setup_bdev_super(struct super_block *sb, int sb_flags,
+               struct fs_context *fc)
+{
+       blk_mode_t mode = sb_open_mode(sb_flags);
+       struct block_device *bdev;
+
+       bdev = blkdev_get_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
+       if (IS_ERR(bdev)) {
+               if (fc)
+                       errorf(fc, "%s: Can't open blockdev", fc->source);
+               return PTR_ERR(bdev);
+       }
+
+       /*
+        * This really should be in blkdev_get_by_dev, but right now can't due
+        * to legacy issues that require us to allow opening a block device node
+        * writable from userspace even for a read-only block device.
+        */
+       if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
+               blkdev_put(bdev, sb);
+               return -EACCES;
+       }
+
+       /*
+        * Until SB_BORN flag is set, there can be no active superblock
+        * references and thus no filesystem freezing. get_active_super() will
+        * just loop waiting for SB_BORN so even freeze_bdev() cannot proceed.
+        *
+        * It is enough to check bdev was not frozen before we set s_bdev.
+        */
+       mutex_lock(&bdev->bd_fsfreeze_mutex);
+       if (bdev->bd_fsfreeze_count > 0) {
+               mutex_unlock(&bdev->bd_fsfreeze_mutex);
+               if (fc)
+                       warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+               blkdev_put(bdev, sb);
+               return -EBUSY;
+       }
+       spin_lock(&sb_lock);
+       sb->s_bdev = bdev;
+       sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
+       if (bdev_stable_writes(bdev))
+               sb->s_iflags |= SB_I_STABLE_WRITES;
+       spin_unlock(&sb_lock);
+       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+
+       snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
+       shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
+                               sb->s_id);
+       sb_set_blocksize(sb, block_size(bdev));
+       return 0;
 }
+EXPORT_SYMBOL_GPL(setup_bdev_super);
 
 /**
  * get_tree_bdev - Get a superblock based on a single block device
@@ -1256,73 +1498,49 @@ int get_tree_bdev(struct fs_context *fc,
                int (*fill_super)(struct super_block *,
                                  struct fs_context *))
 {
-       struct block_device *bdev;
        struct super_block *s;
        int error = 0;
+       dev_t dev;
 
        if (!fc->source)
                return invalf(fc, "No source specified");
 
-       bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags),
-                                 fc->fs_type, &fs_holder_ops);
-       if (IS_ERR(bdev)) {
-               errorf(fc, "%s: Can't open blockdev", fc->source);
-               return PTR_ERR(bdev);
-       }
-
-       /* Once the superblock is inserted into the list by sget_fc(), s_umount
-        * will protect the lockfs code from trying to start a snapshot while
-        * we are mounting
-        */
-       mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-               blkdev_put(bdev, fc->fs_type);
-               return -EBUSY;
+       error = lookup_bdev(fc->source, &dev);
+       if (error) {
+               errorf(fc, "%s: Can't lookup blockdev", fc->source);
+               return error;
        }
 
        fc->sb_flags |= SB_NOSEC;
-       fc->sget_key = bdev;
+       fc->sget_key = &dev;
        s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
-       mutex_unlock(&bdev->bd_fsfreeze_mutex);
-       if (IS_ERR(s)) {
-               blkdev_put(bdev, fc->fs_type);
+       if (IS_ERR(s))
                return PTR_ERR(s);
-       }
 
        if (s->s_root) {
                /* Don't summarily change the RO/RW state. */
                if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
-                       warnf(fc, "%pg: Can't mount, would change RO state", bdev);
+                       warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev);
                        deactivate_locked_super(s);
-                       blkdev_put(bdev, fc->fs_type);
                        return -EBUSY;
                }
-
+       } else {
                /*
-                * s_umount nests inside open_mutex during
-                * __invalidate_device().  blkdev_put() acquires
-                * open_mutex and can't be called under s_umount.  Drop
-                * s_umount temporarily.  This is safe as we're
-                * holding an active reference.
+                * We drop s_umount here because we need to open the bdev and
+                * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+                * bdev_mark_dead()). It is safe because we have active sb
+                * reference and SB_BORN is not set yet.
                 */
-               up_write(&s->s_umount);
-               blkdev_put(bdev, fc->fs_type);
-               down_write(&s->s_umount);
-       } else {
-               snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-               shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
-                                       fc->fs_type->name, s->s_id);
-               sb_set_blocksize(s, block_size(bdev));
-               error = fill_super(s, fc);
+               super_unlock_excl(s);
+               error = setup_bdev_super(s, fc->sb_flags, fc);
+               __super_lock_excl(s);
+               if (!error)
+                       error = fill_super(s, fc);
                if (error) {
                        deactivate_locked_super(s);
                        return error;
                }
-
                s->s_flags |= SB_ACTIVE;
-               bdev->bd_super = s;
        }
 
        BUG_ON(fc->root);
@@ -1333,79 +1551,52 @@ EXPORT_SYMBOL(get_tree_bdev);
 
 static int test_bdev_super(struct super_block *s, void *data)
 {
-       return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data;
+       return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
 }
 
 struct dentry *mount_bdev(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data,
        int (*fill_super)(struct super_block *, void *, int))
 {
-       struct block_device *bdev;
        struct super_block *s;
-       int error = 0;
+       int error;
+       dev_t dev;
 
-       bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
-                                 &fs_holder_ops);
-       if (IS_ERR(bdev))
-               return ERR_CAST(bdev);
+       error = lookup_bdev(dev_name, &dev);
+       if (error)
+               return ERR_PTR(error);
 
-       /*
-        * once the super is inserted into the list by sget, s_umount
-        * will protect the lockfs code from trying to start a snapshot
-        * while we are mounting
-        */
-       mutex_lock(&bdev->bd_fsfreeze_mutex);
-       if (bdev->bd_fsfreeze_count > 0) {
-               mutex_unlock(&bdev->bd_fsfreeze_mutex);
-               error = -EBUSY;
-               goto error_bdev;
-       }
-       s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
-                bdev);
-       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+       flags |= SB_NOSEC;
+       s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev);
        if (IS_ERR(s))
-               goto error_s;
+               return ERR_CAST(s);
 
        if (s->s_root) {
                if ((flags ^ s->s_flags) & SB_RDONLY) {
                        deactivate_locked_super(s);
-                       error = -EBUSY;
-                       goto error_bdev;
+                       return ERR_PTR(-EBUSY);
                }
-
+       } else {
                /*
-                * s_umount nests inside open_mutex during
-                * __invalidate_device().  blkdev_put() acquires
-                * open_mutex and can't be called under s_umount.  Drop
-                * s_umount temporarily.  This is safe as we're
-                * holding an active reference.
+                * We drop s_umount here because we need to open the bdev and
+                * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+                * bdev_mark_dead()). It is safe because we have active sb
+                * reference and SB_BORN is not set yet.
                 */
-               up_write(&s->s_umount);
-               blkdev_put(bdev, fs_type);
-               down_write(&s->s_umount);
-       } else {
-               snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
-               shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
-                                       fs_type->name, s->s_id);
-               sb_set_blocksize(s, block_size(bdev));
-               error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+               super_unlock_excl(s);
+               error = setup_bdev_super(s, flags, NULL);
+               __super_lock_excl(s);
+               if (!error)
+                       error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
                if (error) {
                        deactivate_locked_super(s);
-                       goto error;
+                       return ERR_PTR(error);
                }
 
                s->s_flags |= SB_ACTIVE;
-               bdev->bd_super = s;
        }
 
        return dget(s->s_root);
-
-error_s:
-       error = PTR_ERR(s);
-error_bdev:
-       blkdev_put(bdev, fs_type);
-error:
-       return ERR_PTR(error);
 }
 EXPORT_SYMBOL(mount_bdev);
 
@@ -1413,10 +1604,11 @@ void kill_block_super(struct super_block *sb)
 {
        struct block_device *bdev = sb->s_bdev;
 
-       bdev->bd_super = NULL;
        generic_shutdown_super(sb);
-       sync_blockdev(bdev);
-       blkdev_put(bdev, sb->s_type);
+       if (bdev) {
+               sync_blockdev(bdev);
+               blkdev_put(bdev, sb);
+       }
 }
 
 EXPORT_SYMBOL(kill_block_super);
@@ -1533,13 +1725,13 @@ int vfs_get_tree(struct fs_context *fc)
        WARN_ON(!sb->s_bdi);
 
        /*
-        * Write barrier is for super_cache_count(). We place it before setting
-        * SB_BORN as the data dependency between the two functions is the
-        * superblock structure contents that we just set up, not the SB_BORN
-        * flag.
+        * super_wake() contains a memory barrier which also care of
+        * ordering for super_cache_count(). We place it before setting
+        * SB_BORN as the data dependency between the two functions is
+        * the superblock structure contents that we just set up, not
+        * the SB_BORN flag.
         */
-       smp_wmb();
-       sb->s_flags |= SB_BORN;
+       super_wake(sb, SB_BORN);
 
        error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
        if (unlikely(error)) {
@@ -1644,14 +1836,43 @@ static void sb_freeze_unlock(struct super_block *sb, int level)
                percpu_up_write(sb->s_writers.rw_sem + level);
 }
 
+static int wait_for_partially_frozen(struct super_block *sb)
+{
+       int ret = 0;
+
+       do {
+               unsigned short old = sb->s_writers.frozen;
+
+               up_write(&sb->s_umount);
+               ret = wait_var_event_killable(&sb->s_writers.frozen,
+                                              sb->s_writers.frozen != old);
+               down_write(&sb->s_umount);
+       } while (ret == 0 &&
+                sb->s_writers.frozen != SB_UNFROZEN &&
+                sb->s_writers.frozen != SB_FREEZE_COMPLETE);
+
+       return ret;
+}
+
 /**
  * freeze_super - lock the filesystem and force it into a consistent state
  * @sb: the super to lock
+ * @who: context that wants to freeze
  *
  * Syncs the super to make sure the filesystem is consistent and calls the fs's
- * freeze_fs.  Subsequent calls to this without first thawing the fs will return
+ * freeze_fs.  Subsequent calls to this without first thawing the fs may return
  * -EBUSY.
  *
+ * @who should be:
+ * * %FREEZE_HOLDER_USERSPACE if userspace wants to freeze the fs;
+ * * %FREEZE_HOLDER_KERNEL if the kernel wants to freeze the fs.
+ *
+ * The @who argument distinguishes between the kernel and userspace trying to
+ * freeze the filesystem.  Although there cannot be multiple kernel freezes or
+ * multiple userspace freezes in effect at any given time, the kernel and
+ * userspace can both hold a filesystem frozen.  The filesystem remains frozen
+ * until there are no kernel or userspace freezes in effect.
+ *
  * During this function, sb->s_writers.frozen goes through these values:
  *
  * SB_UNFROZEN: File system is normal, all writes progress as usual.
@@ -1677,34 +1898,62 @@ static void sb_freeze_unlock(struct super_block *sb, int level)
  *
  * sb->s_writers.frozen is protected by sb->s_umount.
  */
-int freeze_super(struct super_block *sb)
+int freeze_super(struct super_block *sb, enum freeze_holder who)
 {
        int ret;
 
        atomic_inc(&sb->s_active);
-       down_write(&sb->s_umount);
+       if (!super_lock_excl(sb))
+               WARN(1, "Dying superblock while freezing!");
+
+retry:
+       if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
+               if (sb->s_writers.freeze_holders & who) {
+                       deactivate_locked_super(sb);
+                       return -EBUSY;
+               }
+
+               WARN_ON(sb->s_writers.freeze_holders == 0);
+
+               /*
+                * Someone else already holds this type of freeze; share the
+                * freeze and assign the active ref to the freeze.
+                */
+               sb->s_writers.freeze_holders |= who;
+               super_unlock_excl(sb);
+               return 0;
+       }
+
        if (sb->s_writers.frozen != SB_UNFROZEN) {
-               deactivate_locked_super(sb);
-               return -EBUSY;
+               ret = wait_for_partially_frozen(sb);
+               if (ret) {
+                       deactivate_locked_super(sb);
+                       return ret;
+               }
+
+               goto retry;
        }
 
        if (!(sb->s_flags & SB_BORN)) {
-               up_write(&sb->s_umount);
+               super_unlock_excl(sb);
                return 0;       /* sic - it's "nothing to do" */
        }
 
        if (sb_rdonly(sb)) {
                /* Nothing to do really... */
+               sb->s_writers.freeze_holders |= who;
                sb->s_writers.frozen = SB_FREEZE_COMPLETE;
-               up_write(&sb->s_umount);
+               wake_up_var(&sb->s_writers.frozen);
+               super_unlock_excl(sb);
                return 0;
        }
 
        sb->s_writers.frozen = SB_FREEZE_WRITE;
        /* Release s_umount to preserve sb_start_write -> s_umount ordering */
-       up_write(&sb->s_umount);
+       super_unlock_excl(sb);
        sb_wait_write(sb, SB_FREEZE_WRITE);
-       down_write(&sb->s_umount);
+       if (!super_lock_excl(sb))
+               WARN(1, "Dying superblock while freezing!");
 
        /* Now we go and block page faults... */
        sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
@@ -1715,6 +1964,7 @@ int freeze_super(struct super_block *sb)
        if (ret) {
                sb->s_writers.frozen = SB_UNFROZEN;
                sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
+               wake_up_var(&sb->s_writers.frozen);
                deactivate_locked_super(sb);
                return ret;
        }
@@ -1730,6 +1980,7 @@ int freeze_super(struct super_block *sb)
                                "VFS:Filesystem freeze failed\n");
                        sb->s_writers.frozen = SB_UNFROZEN;
                        sb_freeze_unlock(sb, SB_FREEZE_FS);
+                       wake_up_var(&sb->s_writers.frozen);
                        deactivate_locked_super(sb);
                        return ret;
                }
@@ -1738,24 +1989,50 @@ int freeze_super(struct super_block *sb)
         * For debugging purposes so that fs can warn if it sees write activity
         * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
         */
+       sb->s_writers.freeze_holders |= who;
        sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+       wake_up_var(&sb->s_writers.frozen);
        lockdep_sb_freeze_release(sb);
-       up_write(&sb->s_umount);
+       super_unlock_excl(sb);
        return 0;
 }
 EXPORT_SYMBOL(freeze_super);
 
-static int thaw_super_locked(struct super_block *sb)
+/*
+ * Undoes the effect of a freeze_super_locked call.  If the filesystem is
+ * frozen both by userspace and the kernel, a thaw call from either source
+ * removes that state without releasing the other state or unlocking the
+ * filesystem.
+ */
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
 {
        int error;
 
-       if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
-               up_write(&sb->s_umount);
+       if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
+               if (!(sb->s_writers.freeze_holders & who)) {
+                       super_unlock_excl(sb);
+                       return -EINVAL;
+               }
+
+               /*
+                * Freeze is shared with someone else.  Release our hold and
+                * drop the active ref that freeze_super assigned to the
+                * freezer.
+                */
+               if (sb->s_writers.freeze_holders & ~who) {
+                       sb->s_writers.freeze_holders &= ~who;
+                       deactivate_locked_super(sb);
+                       return 0;
+               }
+       } else {
+               super_unlock_excl(sb);
                return -EINVAL;
        }
 
        if (sb_rdonly(sb)) {
+               sb->s_writers.freeze_holders &= ~who;
                sb->s_writers.frozen = SB_UNFROZEN;
+               wake_up_var(&sb->s_writers.frozen);
                goto out;
        }
 
@@ -1764,15 +2041,16 @@ static int thaw_super_locked(struct super_block *sb)
        if (sb->s_op->unfreeze_fs) {
                error = sb->s_op->unfreeze_fs(sb);
                if (error) {
-                       printk(KERN_ERR
-                               "VFS:Filesystem thaw failed\n");
+                       printk(KERN_ERR "VFS:Filesystem thaw failed\n");
                        lockdep_sb_freeze_release(sb);
-                       up_write(&sb->s_umount);
+                       super_unlock_excl(sb);
                        return error;
                }
        }
 
+       sb->s_writers.freeze_holders &= ~who;
        sb->s_writers.frozen = SB_UNFROZEN;
+       wake_up_var(&sb->s_writers.frozen);
        sb_freeze_unlock(sb, SB_FREEZE_FS);
 out:
        deactivate_locked_super(sb);
@@ -1782,13 +2060,20 @@ out:
 /**
  * thaw_super -- unlock filesystem
  * @sb: the super to thaw
+ * @who: context that wants to freeze
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_super()
+ * if there are no remaining freezes on the filesystem.
  *
- * Unlocks the filesystem and marks it writeable again after freeze_super().
+ * @who should be:
+ * * %FREEZE_HOLDER_USERSPACE if userspace wants to thaw the fs;
+ * * %FREEZE_HOLDER_KERNEL if the kernel wants to thaw the fs.
  */
-int thaw_super(struct super_block *sb)
+int thaw_super(struct super_block *sb, enum freeze_holder who)
 {
-       down_write(&sb->s_umount);
-       return thaw_super_locked(sb);
+       if (!super_lock_excl(sb))
+               WARN(1, "Dying superblock while thawing!");
+       return thaw_super_locked(sb, who);
 }
 EXPORT_SYMBOL(thaw_super);
 
index 0140010..2f5ead8 100644 (file)
@@ -224,7 +224,7 @@ got_it:
        memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2);
        de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
        dir_commit_chunk(page, pos, SYSV_DIRSIZE);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        err = sysv_handle_dirsync(dir);
 out_page:
@@ -249,7 +249,7 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
        }
        de->inode = 0;
        dir_commit_chunk(page, pos, SYSV_DIRSIZE);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        return sysv_handle_dirsync(inode);
 }
@@ -346,7 +346,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page,
        }
        de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
        dir_commit_chunk(page, pos, SYSV_DIRSIZE);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        return sysv_handle_dirsync(inode);
 }
index e732879..6719da5 100644 (file)
@@ -165,7 +165,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
        dirty_sb(sb);
        inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
        inode->i_ino = fs16_to_cpu(sbi, ino);
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_blocks = 0;
        memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data));
        SYSV_I(inode)->i_dir_start_lookup = 0;
index 9e8d4a6..0aa3827 100644 (file)
@@ -202,8 +202,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
        inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
        inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
        inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
-       inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_ctime);
-       inode->i_ctime.tv_nsec = 0;
+       inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->i_ctime), 0);
        inode->i_atime.tv_nsec = 0;
        inode->i_mtime.tv_nsec = 0;
        inode->i_blocks = 0;
@@ -256,7 +255,7 @@ static int __sysv_write_inode(struct inode *inode, int wait)
        raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size);
        raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec);
        raw_inode->i_mtime = cpu_to_fs32(sbi, inode->i_mtime.tv_sec);
-       raw_inode->i_ctime = cpu_to_fs32(sbi, inode->i_ctime.tv_sec);
+       raw_inode->i_ctime = cpu_to_fs32(sbi, inode_get_ctime(inode).tv_sec);
 
        si = SYSV_I(inode);
        if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
index 58d7f43..edb94e5 100644 (file)
@@ -183,7 +183,7 @@ static inline int splice_branch(struct inode *inode,
        *where->p = where->key;
        write_unlock(&pointers_lock);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
 
        /* had we spliced it onto indirect block? */
        if (where->bh)
@@ -423,7 +423,7 @@ do_indirects:
                }
                n++;
        }
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        if (IS_SYNC(inode))
                sysv_sync_inode (inode);
        else
@@ -449,7 +449,8 @@ int sysv_getattr(struct mnt_idmap *idmap, const struct path *path,
                 struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct super_block *s = path->dentry->d_sb;
-       generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+                        stat);
        stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
        stat->blksize = s->s_blocksize;
        return 0;
index fcf163f..d6b7379 100644 (file)
@@ -103,7 +103,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 {
        struct inode *inode = d_inode(old_dentry);
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        inode_inc_link_count(inode);
        ihold(inode);
 
@@ -161,7 +161,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
 
        err = sysv_delete_entry(de, page);
        if (!err) {
-               inode->i_ctime = dir->i_ctime;
+               inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
                inode_dec_link_count(inode);
        }
        unmap_and_put_page(page, de);
@@ -230,7 +230,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                unmap_and_put_page(new_page, new_de);
                if (err)
                        goto out_dir;
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
                if (dir_de)
                        drop_nlink(new_inode);
                inode_dec_link_count(new_inode);
index 57ac8aa..2feb6c5 100644 (file)
@@ -132,7 +132,7 @@ static struct inode *tracefs_get_inode(struct super_block *sb)
        struct inode *inode = new_inode(sb);
        if (inode) {
                inode->i_ino = get_next_ino();
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        }
        return inode;
 }
index 9c9d3f0..eef9e52 100644 (file)
@@ -243,8 +243,8 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
               (unsigned int)inode->i_mtime.tv_sec,
               (unsigned int)inode->i_mtime.tv_nsec);
        pr_err("\tctime          %u.%u\n",
-              (unsigned int)inode->i_ctime.tv_sec,
-              (unsigned int)inode->i_ctime.tv_nsec);
+              (unsigned int) inode_get_ctime(inode).tv_sec,
+              (unsigned int) inode_get_ctime(inode).tv_nsec);
        pr_err("\tcreat_sqnum    %llu\n", ui->creat_sqnum);
        pr_err("\txattr_size     %u\n", ui->xattr_size);
        pr_err("\txattr_cnt      %u\n", ui->xattr_cnt);
index ef0499e..2f48c58 100644 (file)
@@ -96,8 +96,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
        inode->i_flags |= S_NOCMTIME;
 
        inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
-       inode->i_mtime = inode->i_atime = inode->i_ctime =
-                        current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_mapping->nrpages = 0;
 
        if (!is_xattr) {
@@ -325,7 +324,7 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir,
        mutex_lock(&dir_ui->ui_mutex);
        dir->i_size += sz_change;
        dir_ui->ui_size = dir->i_size;
-       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
        err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
        if (err)
                goto out_cancel;
@@ -765,10 +764,10 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
 
        inc_nlink(inode);
        ihold(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        dir->i_size += sz_change;
        dir_ui->ui_size = dir->i_size;
-       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
        err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
        if (err)
                goto out_cancel;
@@ -838,11 +837,11 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
        }
 
        lock_2_inodes(dir, inode);
-       inode->i_ctime = current_time(dir);
+       inode_set_ctime_current(inode);
        drop_nlink(inode);
        dir->i_size -= sz_change;
        dir_ui->ui_size = dir->i_size;
-       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
        err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
        if (err)
                goto out_cancel;
@@ -940,12 +939,12 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
        }
 
        lock_2_inodes(dir, inode);
-       inode->i_ctime = current_time(dir);
+       inode_set_ctime_current(inode);
        clear_nlink(inode);
        drop_nlink(dir);
        dir->i_size -= sz_change;
        dir_ui->ui_size = dir->i_size;
-       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
        err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
        if (err)
                goto out_cancel;
@@ -1019,7 +1018,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
        inc_nlink(dir);
        dir->i_size += sz_change;
        dir_ui->ui_size = dir->i_size;
-       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
        err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
        if (err) {
                ubifs_err(c, "cannot create directory, error %d", err);
@@ -1110,7 +1109,7 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir,
        mutex_lock(&dir_ui->ui_mutex);
        dir->i_size += sz_change;
        dir_ui->ui_size = dir->i_size;
-       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
        err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
        if (err)
                goto out_cancel;
@@ -1210,7 +1209,7 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
        mutex_lock(&dir_ui->ui_mutex);
        dir->i_size += sz_change;
        dir_ui->ui_size = dir->i_size;
-       dir->i_mtime = dir->i_ctime = inode->i_ctime;
+       dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
        err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
        if (err)
                goto out_cancel;
@@ -1298,7 +1297,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
                        .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
        struct ubifs_budget_req wht_req;
-       struct timespec64 time;
        unsigned int saved_nlink;
        struct fscrypt_name old_nm, new_nm;
 
@@ -1414,8 +1412,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
         * Like most other Unix systems, set the @i_ctime for inodes on a
         * rename.
         */
-       time = current_time(old_dir);
-       old_inode->i_ctime = time;
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
        /* We must adjust parent link count when renaming directories */
        if (is_dir) {
@@ -1444,13 +1441,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        old_dir->i_size -= old_sz;
        ubifs_inode(old_dir)->ui_size = old_dir->i_size;
-       old_dir->i_mtime = old_dir->i_ctime = time;
-       new_dir->i_mtime = new_dir->i_ctime = time;
 
        /*
         * And finally, if we unlinked a direntry which happened to have the
         * same name as the moved direntry, we have to decrement @i_nlink of
-        * the unlinked inode and change its ctime.
+        * the unlinked inode.
         */
        if (unlink) {
                /*
@@ -1462,7 +1457,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
                        clear_nlink(new_inode);
                else
                        drop_nlink(new_inode);
-               new_inode->i_ctime = time;
        } else {
                new_dir->i_size += new_sz;
                ubifs_inode(new_dir)->ui_size = new_dir->i_size;
@@ -1557,7 +1551,6 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
        int sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
        struct inode *fst_inode = d_inode(old_dentry);
        struct inode *snd_inode = d_inode(new_dentry);
-       struct timespec64 time;
        int err;
        struct fscrypt_name fst_nm, snd_nm;
 
@@ -1588,11 +1581,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
 
        lock_4_inodes(old_dir, new_dir, NULL, NULL);
 
-       time = current_time(old_dir);
-       fst_inode->i_ctime = time;
-       snd_inode->i_ctime = time;
-       old_dir->i_mtime = old_dir->i_ctime = time;
-       new_dir->i_mtime = new_dir->i_ctime = time;
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
 
        if (old_dir != new_dir) {
                if (S_ISDIR(fst_inode->i_mode) && !S_ISDIR(snd_inode->i_mode)) {
@@ -1665,7 +1654,7 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path,
                                STATX_ATTR_ENCRYPTED |
                                STATX_ATTR_IMMUTABLE);
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        stat->blksize = UBIFS_BLOCK_SIZE;
        stat->size = ui->ui_size;
 
index 6738fe4..e5382f0 100644 (file)
@@ -1092,7 +1092,7 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr)
        if (attr->ia_valid & ATTR_MTIME)
                inode->i_mtime = attr->ia_mtime;
        if (attr->ia_valid & ATTR_CTIME)
-               inode->i_ctime = attr->ia_ctime;
+               inode_set_ctime_to_ts(inode, attr->ia_ctime);
        if (attr->ia_valid & ATTR_MODE) {
                umode_t mode = attr->ia_mode;
 
@@ -1192,7 +1192,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
        mutex_lock(&ui->ui_mutex);
        ui->ui_size = inode->i_size;
        /* Truncation changes inode [mc]time */
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        /* Other attributes may be changed at the same time as well */
        do_attr_changes(inode, attr);
        err = ubifs_jnl_truncate(c, inode, old_size, new_size);
@@ -1239,7 +1239,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
        mutex_lock(&ui->ui_mutex);
        if (attr->ia_valid & ATTR_SIZE) {
                /* Truncation changes inode [mc]time */
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                /* 'truncate_setsize()' changed @i_size, update @ui_size */
                ui->ui_size = inode->i_size;
        }
@@ -1364,8 +1364,10 @@ out:
 static inline int mctime_update_needed(const struct inode *inode,
                                       const struct timespec64 *now)
 {
+       struct timespec64 ctime = inode_get_ctime(inode);
+
        if (!timespec64_equal(&inode->i_mtime, now) ||
-           !timespec64_equal(&inode->i_ctime, now))
+           !timespec64_equal(&ctime, now))
                return 1;
        return 0;
 }
@@ -1376,8 +1378,7 @@ static inline int mctime_update_needed(const struct inode *inode,
  *
  * This function updates time of the inode.
  */
-int ubifs_update_time(struct inode *inode, struct timespec64 *time,
-                            int flags)
+int ubifs_update_time(struct inode *inode, int flags)
 {
        struct ubifs_inode *ui = ubifs_inode(inode);
        struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -1385,21 +1386,17 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time,
                        .dirtied_ino_d = ALIGN(ui->data_len, 8) };
        int err, release;
 
-       if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
-               return generic_update_time(inode, time, flags);
+       if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) {
+               generic_update_time(inode, flags);
+               return 0;
+       }
 
        err = ubifs_budget_space(c, &req);
        if (err)
                return err;
 
        mutex_lock(&ui->ui_mutex);
-       if (flags & S_ATIME)
-               inode->i_atime = *time;
-       if (flags & S_CTIME)
-               inode->i_ctime = *time;
-       if (flags & S_MTIME)
-               inode->i_mtime = *time;
-
+       inode_update_timestamps(inode, flags);
        release = ui->dirty;
        __mark_inode_dirty(inode, I_DIRTY_SYNC);
        mutex_unlock(&ui->ui_mutex);
@@ -1432,7 +1429,7 @@ static int update_mctime(struct inode *inode)
                        return err;
 
                mutex_lock(&ui->ui_mutex);
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                release = ui->dirty;
                mark_inode_dirty_sync(inode);
                mutex_unlock(&ui->ui_mutex);
@@ -1570,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
                struct ubifs_inode *ui = ubifs_inode(inode);
 
                mutex_lock(&ui->ui_mutex);
-               inode->i_mtime = inode->i_ctime = current_time(inode);
+               inode->i_mtime = inode_set_ctime_current(inode);
                release = ui->dirty;
                mark_inode_dirty_sync(inode);
                mutex_unlock(&ui->ui_mutex);
index 67c5108..d79cabe 100644 (file)
@@ -118,7 +118,7 @@ static int setflags(struct inode *inode, int flags)
        ui->flags &= ~ioctl2ubifs(UBIFS_SETTABLE_IOCTL_FLAGS);
        ui->flags |= ioctl2ubifs(flags);
        ubifs_set_inode_flags(inode);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        release = ui->dirty;
        mark_inode_dirty_sync(inode);
        mutex_unlock(&ui->ui_mutex);
index dc52ac0..ffc9bee 100644 (file)
@@ -454,8 +454,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
        ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum);
        ino->atime_sec  = cpu_to_le64(inode->i_atime.tv_sec);
        ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
-       ino->ctime_sec  = cpu_to_le64(inode->i_ctime.tv_sec);
-       ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+       ino->ctime_sec  = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+       ino->ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
        ino->mtime_sec  = cpu_to_le64(inode->i_mtime.tv_sec);
        ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
        ino->uid   = cpu_to_le32(i_uid_read(inode));
index 32cb147..b08fb28 100644 (file)
@@ -146,8 +146,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
        inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
        inode->i_mtime.tv_sec  = (int64_t)le64_to_cpu(ino->mtime_sec);
        inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec);
-       inode->i_ctime.tv_sec  = (int64_t)le64_to_cpu(ino->ctime_sec);
-       inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec);
+       inode_set_ctime(inode, (int64_t)le64_to_cpu(ino->ctime_sec),
+                       le32_to_cpu(ino->ctime_nsec));
        inode->i_mode = le32_to_cpu(ino->mode);
        inode->i_size = le64_to_cpu(ino->size);
 
index 4c36044..ebb3ad6 100644 (file)
@@ -2027,7 +2027,7 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc);
 int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
 int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
                  struct iattr *attr);
-int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
+int ubifs_update_time(struct inode *inode, int flags);
 
 /* dir.c */
 struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
index 349228d..406c82e 100644 (file)
@@ -134,7 +134,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
        ui->data_len = size;
 
        mutex_lock(&host_ui->ui_mutex);
-       host->i_ctime = current_time(host);
+       inode_set_ctime_current(host);
        host_ui->xattr_cnt += 1;
        host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm));
        host_ui->xattr_size += CALC_XATTR_BYTES(size);
@@ -215,7 +215,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
        ui->data_len = size;
 
        mutex_lock(&host_ui->ui_mutex);
-       host->i_ctime = current_time(host);
+       inode_set_ctime_current(host);
        host_ui->xattr_size -= CALC_XATTR_BYTES(old_size);
        host_ui->xattr_size += CALC_XATTR_BYTES(size);
 
@@ -474,7 +474,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
                return err;
 
        mutex_lock(&host_ui->ui_mutex);
-       host->i_ctime = current_time(host);
+       inode_set_ctime_current(host);
        host_ui->xattr_cnt -= 1;
        host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm));
        host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
index 5f7ac8c..6b558cb 100644 (file)
@@ -100,7 +100,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
                iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
        else
                iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        iinfo->i_crtime = inode->i_mtime;
        if (unlikely(insert_inode_locked(inode) < 0)) {
                make_bad_inode(inode);
index 28cdfc5..d089795 100644 (file)
@@ -910,7 +910,7 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
        map->oflags = UDF_BLK_NEW | UDF_BLK_MAPPED;
        iinfo->i_next_alloc_block = map->lblk + 1;
        iinfo->i_next_alloc_goal = newblocknum + 1;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
 
        if (IS_SYNC(inode))
                udf_sync_inode(inode);
@@ -1298,7 +1298,7 @@ set_size:
                        goto out_unlock;
        }
 update_time:
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        if (IS_SYNC(inode))
                udf_sync_inode(inode);
        else
@@ -1329,6 +1329,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode)
        int bs = inode->i_sb->s_blocksize;
        int ret = -EIO;
        uint32_t uid, gid;
+       struct timespec64 ctime;
 
 reread:
        if (iloc->partitionReferenceNum >= sbi->s_partitions) {
@@ -1507,7 +1508,8 @@ reread:
 
                udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime);
                udf_disk_stamp_to_time(&inode->i_mtime, fe->modificationTime);
-               udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime);
+               udf_disk_stamp_to_time(&ctime, fe->attrTime);
+               inode_set_ctime_to_ts(inode, ctime);
 
                iinfo->i_unique = le64_to_cpu(fe->uniqueID);
                iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
@@ -1522,7 +1524,8 @@ reread:
                udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime);
                udf_disk_stamp_to_time(&inode->i_mtime, efe->modificationTime);
                udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime);
-               udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime);
+               udf_disk_stamp_to_time(&ctime, efe->attrTime);
+               inode_set_ctime_to_ts(inode, ctime);
 
                iinfo->i_unique = le64_to_cpu(efe->uniqueID);
                iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
@@ -1799,7 +1802,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 
                udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
                udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
-               udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime);
+               udf_time_to_disk_stamp(&fe->attrTime, inode_get_ctime(inode));
                memset(&(fe->impIdent), 0, sizeof(struct regid));
                strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER);
                fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
@@ -1830,12 +1833,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
 
                udf_adjust_time(iinfo, inode->i_atime);
                udf_adjust_time(iinfo, inode->i_mtime);
-               udf_adjust_time(iinfo, inode->i_ctime);
+               udf_adjust_time(iinfo, inode_get_ctime(inode));
 
                udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime);
                udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime);
                udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime);
-               udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime);
+               udf_time_to_disk_stamp(&efe->attrTime, inode_get_ctime(inode));
 
                memset(&(efe->impIdent), 0, sizeof(efe->impIdent));
                strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
index a95579b..ae55ab8 100644 (file)
@@ -365,7 +365,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
        *(__le32 *)((struct allocDescImpUse *)iter.fi.icb.impUse)->impUse =
                cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
        udf_fiiter_write_fi(&iter, NULL);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        udf_fiiter_release(&iter);
        udf_add_fid_counter(dir->i_sb, false, 1);
@@ -471,7 +471,7 @@ static int udf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
        udf_fiiter_release(&iter);
        udf_add_fid_counter(dir->i_sb, true, 1);
        inc_nlink(dir);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        d_instantiate_new(dentry, inode);
 
@@ -523,8 +523,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
        inode->i_size = 0;
        inode_dec_link_count(dir);
        udf_add_fid_counter(dir->i_sb, true, -1);
-       inode->i_ctime = dir->i_ctime = dir->i_mtime =
-                                               current_time(inode);
+       dir->i_mtime = inode_set_ctime_to_ts(dir,
+                                            inode_set_ctime_current(inode));
        mark_inode_dirty(dir);
        ret = 0;
 end_rmdir:
@@ -555,11 +555,11 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
                set_nlink(inode, 1);
        }
        udf_fiiter_delete_entry(&iter);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        inode_dec_link_count(inode);
        udf_add_fid_counter(dir->i_sb, false, -1);
-       inode->i_ctime = dir->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
        ret = 0;
 end_unlink:
        udf_fiiter_release(&iter);
@@ -746,9 +746,9 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 
        inc_nlink(inode);
        udf_add_fid_counter(dir->i_sb, false, 1);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        ihold(inode);
        d_instantiate(dentry, inode);
@@ -833,7 +833,7 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
-       old_inode->i_ctime = current_time(old_inode);
+       inode_set_ctime_current(old_inode);
        mark_inode_dirty(old_inode);
 
        /*
@@ -861,13 +861,13 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
        }
 
        if (new_inode) {
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
                inode_dec_link_count(new_inode);
                udf_add_fid_counter(old_dir->i_sb, S_ISDIR(new_inode->i_mode),
                                    -1);
        }
-       old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
-       new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir);
+       old_dir->i_mtime = inode_set_ctime_current(old_dir);
+       new_dir->i_mtime = inode_set_ctime_current(new_dir);
        mark_inode_dirty(old_dir);
        mark_inode_dirty(new_dir);
 
index 779b5c2..f7eaf7b 100644 (file)
@@ -149,7 +149,7 @@ static int udf_symlink_getattr(struct mnt_idmap *idmap,
        struct inode *inode = d_backing_inode(dentry);
        struct page *page;
 
-       generic_fillattr(&nop_mnt_idmap, inode, stat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
        page = read_mapping_page(inode->i_mapping, 0, NULL);
        if (IS_ERR(page))
                return PTR_ERR(page);
index 379d757..fd57f03 100644 (file)
@@ -107,7 +107,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
        ufs_commit_chunk(page, pos, len);
        ufs_put_page(page);
        if (update_times)
-               dir->i_mtime = dir->i_ctime = current_time(dir);
+               dir->i_mtime = inode_set_ctime_current(dir);
        mark_inode_dirty(dir);
        ufs_handle_dirsync(dir);
 }
@@ -397,7 +397,7 @@ got_it:
        ufs_set_de_type(sb, de, inode->i_mode);
 
        ufs_commit_chunk(page, pos, rec_len);
-       dir->i_mtime = dir->i_ctime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
 
        mark_inode_dirty(dir);
        err = ufs_handle_dirsync(dir);
@@ -539,7 +539,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
                pde->d_reclen = cpu_to_fs16(sb, to - from);
        dir->d_ino = 0;
        ufs_commit_chunk(page, pos, to - from);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
        err = ufs_handle_dirsync(inode);
 out:
index 06bd84d..a1e7bd9 100644 (file)
@@ -292,7 +292,7 @@ cg_found:
        inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
        inode->i_blocks = 0;
        inode->i_generation = 0;
-       inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        ufsi->i_flags = UFS_I(dir)->i_flags;
        ufsi->i_lastfrag = 0;
        ufsi->i_shadow = 0;
index a4246c8..21a4779 100644 (file)
@@ -296,7 +296,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
 
        if (new)
                *new = 1;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        if (IS_SYNC(inode))
                ufs_sync_inode (inode);
        mark_inode_dirty(inode);
@@ -378,7 +378,7 @@ ufs_inode_getblock(struct inode *inode, u64 ind_block,
        mark_buffer_dirty(bh);
        if (IS_SYNC(inode))
                sync_dirty_buffer(bh);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 out:
        brelse (bh);
@@ -580,11 +580,12 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
 
        inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
        inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
-       inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+       inode_set_ctime(inode,
+                       (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec),
+                       0);
        inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
        inode->i_mtime.tv_nsec = 0;
        inode->i_atime.tv_nsec = 0;
-       inode->i_ctime.tv_nsec = 0;
        inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks);
        inode->i_generation = fs32_to_cpu(sb, ufs_inode->ui_gen);
        ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags);
@@ -626,10 +627,10 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 
        inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size);
        inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime);
-       inode->i_ctime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_ctime);
+       inode_set_ctime(inode, fs64_to_cpu(sb, ufs2_inode->ui_ctime),
+                       fs32_to_cpu(sb, ufs2_inode->ui_ctimensec));
        inode->i_mtime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_mtime);
        inode->i_atime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_atimensec);
-       inode->i_ctime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_ctimensec);
        inode->i_mtime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_mtimensec);
        inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
        inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen);
@@ -726,7 +727,8 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
        ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
        ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
        ufs_inode->ui_atime.tv_usec = 0;
-       ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec);
+       ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb,
+                                                inode_get_ctime(inode).tv_sec);
        ufs_inode->ui_ctime.tv_usec = 0;
        ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec);
        ufs_inode->ui_mtime.tv_usec = 0;
@@ -770,8 +772,9 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
        ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
        ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec);
        ufs_inode->ui_atimensec = cpu_to_fs32(sb, inode->i_atime.tv_nsec);
-       ufs_inode->ui_ctime = cpu_to_fs64(sb, inode->i_ctime.tv_sec);
-       ufs_inode->ui_ctimensec = cpu_to_fs32(sb, inode->i_ctime.tv_nsec);
+       ufs_inode->ui_ctime = cpu_to_fs64(sb, inode_get_ctime(inode).tv_sec);
+       ufs_inode->ui_ctimensec = cpu_to_fs32(sb,
+                                             inode_get_ctime(inode).tv_nsec);
        ufs_inode->ui_mtime = cpu_to_fs64(sb, inode->i_mtime.tv_sec);
        ufs_inode->ui_mtimensec = cpu_to_fs32(sb, inode->i_mtime.tv_nsec);
 
@@ -1205,7 +1208,7 @@ static int ufs_truncate(struct inode *inode, loff_t size)
        truncate_setsize(inode, size);
 
        ufs_truncate_blocks(inode);
-       inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        mark_inode_dirty(inode);
 out:
        UFSD("EXIT: err %d\n", err);
index 36154b5..9cad294 100644 (file)
@@ -153,7 +153,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
        struct inode *inode = d_inode(old_dentry);
        int error;
 
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        inode_inc_link_count(inode);
        ihold(inode);
 
@@ -220,7 +220,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
        if (err)
                goto out;
 
-       inode->i_ctime = dir->i_ctime;
+       inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
        inode_dec_link_count(inode);
        err = 0;
 out:
@@ -282,7 +282,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
                if (!new_de)
                        goto out_dir;
                ufs_set_link(new_dir, new_de, new_page, old_inode, 1);
-               new_inode->i_ctime = current_time(new_inode);
+               inode_set_ctime_current(new_inode);
                if (dir_de)
                        drop_nlink(new_inode);
                inode_dec_link_count(new_inode);
@@ -298,7 +298,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
         * Like most other Unix systems, set the ctime for inodes on a
         * rename.
         */
-       old_inode->i_ctime = current_time(old_inode);
+       inode_set_ctime_current(old_inode);
 
        ufs_delete_entry(old_dir, old_de, old_page);
        mark_inode_dirty(old_inode);
index 075f15c..5f1a14d 100644 (file)
@@ -179,9 +179,10 @@ static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx)
        return 0;
 }
 
+WRAP_DIR_ITER(vboxsf_dir_iterate) // FIXME!
 const struct file_operations vboxsf_dir_fops = {
        .open = vboxsf_dir_open,
-       .iterate = vboxsf_dir_iterate,
+       .iterate_shared = shared_vboxsf_dir_iterate,
        .release = vboxsf_dir_release,
        .read = generic_read_dir,
        .llseek = generic_file_llseek,
index aca8290..069a019 100644 (file)
@@ -68,9 +68,9 @@ struct shfl_string {
 
        /** UTF-8 or UTF-16 string. Nul terminated. */
        union {
-               u8 utf8[2];
-               u16 utf16[1];
-               u16 ucs2[1]; /* misnomer, use utf16. */
+               u8 legacy_padding[2];
+               DECLARE_FLEX_ARRAY(u8, utf8);
+               DECLARE_FLEX_ARRAY(u16, utf16);
        } string;
 };
 VMMDEV_ASSERT_SIZE(shfl_string, 6);
index dd0ae11..83f20dd 100644 (file)
@@ -128,8 +128,8 @@ int vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
 
        inode->i_atime = ns_to_timespec64(
                                 info->access_time.ns_relative_to_unix_epoch);
-       inode->i_ctime = ns_to_timespec64(
-                                info->change_time.ns_relative_to_unix_epoch);
+       inode_set_ctime_to_ts(inode,
+                             ns_to_timespec64(info->change_time.ns_relative_to_unix_epoch));
        inode->i_mtime = ns_to_timespec64(
                           info->modification_time.ns_relative_to_unix_epoch);
        return 0;
@@ -252,7 +252,7 @@ int vboxsf_getattr(struct mnt_idmap *idmap, const struct path *path,
        if (err)
                return err;
 
-       generic_fillattr(&nop_mnt_idmap, d_inode(dentry), kstat);
+       generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), kstat);
        return 0;
 }
 
index 49bf3a1..d071a6e 100644 (file)
@@ -118,16 +118,16 @@ void fsverity_free_info(struct fsverity_info *vi);
 int fsverity_get_descriptor(struct inode *inode,
                            struct fsverity_descriptor **desc_ret);
 
-int __init fsverity_init_info_cache(void);
-void __init fsverity_exit_info_cache(void);
+void __init fsverity_init_info_cache(void);
 
 /* signature.c */
 
 #ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+extern int fsverity_require_signatures;
 int fsverity_verify_signature(const struct fsverity_info *vi,
                              const u8 *signature, size_t sig_size);
 
-int __init fsverity_init_signature(void);
+void __init fsverity_init_signature(void);
 #else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
 static inline int
 fsverity_verify_signature(const struct fsverity_info *vi,
@@ -136,15 +136,13 @@ fsverity_verify_signature(const struct fsverity_info *vi,
        return 0;
 }
 
-static inline int fsverity_init_signature(void)
+static inline void fsverity_init_signature(void)
 {
-       return 0;
 }
 #endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
 
 /* verify.c */
 
-int __init fsverity_init_workqueue(void);
-void __init fsverity_exit_workqueue(void);
+void __init fsverity_init_workqueue(void);
 
 #endif /* _FSVERITY_PRIVATE_H */
index c598d20..6b08b1d 100644 (file)
@@ -226,6 +226,14 @@ void __init fsverity_check_hash_algs(void)
                if (!alg->name)
                        continue;
 
+               /*
+                * 0 must never be allocated as an FS_VERITY_HASH_ALG_* value,
+                * as it is reserved for users that use 0 to mean unspecified or
+                * a default value.  fs/verity/ itself doesn't care and doesn't
+                * have a default algorithm, but some users make use of this.
+                */
+               BUG_ON(i == 0);
+
                BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE);
 
                /*
index 0239051..a29f062 100644 (file)
@@ -9,6 +9,37 @@
 
 #include <linux/ratelimit.h>
 
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *fsverity_sysctl_header;
+
+static struct ctl_table fsverity_sysctl_table[] = {
+#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+       {
+               .procname       = "require_signatures",
+               .data           = &fsverity_require_signatures,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_ONE,
+       },
+#endif
+       { }
+};
+
+static void __init fsverity_init_sysctl(void)
+{
+       fsverity_sysctl_header = register_sysctl("fs/verity",
+                                                fsverity_sysctl_table);
+       if (!fsverity_sysctl_header)
+               panic("fsverity sysctl registration failed");
+}
+#else /* CONFIG_SYSCTL */
+static inline void fsverity_init_sysctl(void)
+{
+}
+#endif /* !CONFIG_SYSCTL */
+
 void fsverity_msg(const struct inode *inode, const char *level,
                  const char *fmt, ...)
 {
@@ -33,28 +64,11 @@ void fsverity_msg(const struct inode *inode, const char *level,
 
 static int __init fsverity_init(void)
 {
-       int err;
-
        fsverity_check_hash_algs();
-
-       err = fsverity_init_info_cache();
-       if (err)
-               return err;
-
-       err = fsverity_init_workqueue();
-       if (err)
-               goto err_exit_info_cache;
-
-       err = fsverity_init_signature();
-       if (err)
-               goto err_exit_workqueue;
-
+       fsverity_init_info_cache();
+       fsverity_init_workqueue();
+       fsverity_init_sysctl();
+       fsverity_init_signature();
        return 0;
-
-err_exit_workqueue:
-       fsverity_exit_workqueue();
-err_exit_info_cache:
-       fsverity_exit_info_cache();
-       return err;
 }
 late_initcall(fsverity_init)
index 1db5106..6c31a87 100644 (file)
@@ -408,18 +408,10 @@ void __fsverity_cleanup_inode(struct inode *inode)
 }
 EXPORT_SYMBOL_GPL(__fsverity_cleanup_inode);
 
-int __init fsverity_init_info_cache(void)
+void __init fsverity_init_info_cache(void)
 {
-       fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info,
-                                                  SLAB_RECLAIM_ACCOUNT,
-                                                  file_digest);
-       if (!fsverity_info_cachep)
-               return -ENOMEM;
-       return 0;
-}
-
-void __init fsverity_exit_info_cache(void)
-{
-       kmem_cache_destroy(fsverity_info_cachep);
-       fsverity_info_cachep = NULL;
+       fsverity_info_cachep = KMEM_CACHE_USERCOPY(
+                                       fsverity_info,
+                                       SLAB_RECLAIM_ACCOUNT | SLAB_PANIC,
+                                       file_digest);
 }
index 72034bc..90c0757 100644 (file)
@@ -24,7 +24,7 @@
  * /proc/sys/fs/verity/require_signatures
  * If 1, all verity files must have a valid builtin signature.
  */
-static int fsverity_require_signatures;
+int fsverity_require_signatures;
 
 /*
  * Keyring that contains the trusted X.509 certificates.
@@ -62,6 +62,22 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
                return 0;
        }
 
+       if (fsverity_keyring->keys.nr_leaves_on_tree == 0) {
+               /*
+                * The ".fs-verity" keyring is empty, due to builtin signatures
+                * being supported by the kernel but not actually being used.
+                * In this case, verify_pkcs7_signature() would always return an
+                * error, usually ENOKEY.  It could also be EBADMSG if the
+                * PKCS#7 is malformed, but that isn't very important to
+                * distinguish.  So, just skip to ENOKEY to avoid the attack
+                * surface of the PKCS#7 parser, which would otherwise be
+                * reachable by any task able to execute FS_IOC_ENABLE_VERITY.
+                */
+               fsverity_err(inode,
+                            "fs-verity keyring is empty, rejecting signed file!");
+               return -ENOKEY;
+       }
+
        d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL);
        if (!d)
                return -ENOMEM;
@@ -93,59 +109,14 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
        return 0;
 }
 
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *fsverity_sysctl_header;
-
-static struct ctl_table fsverity_sysctl_table[] = {
-       {
-               .procname       = "require_signatures",
-               .data           = &fsverity_require_signatures,
-               .maxlen         = sizeof(int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
-               .extra1         = SYSCTL_ZERO,
-               .extra2         = SYSCTL_ONE,
-       },
-       { }
-};
-
-static int __init fsverity_sysctl_init(void)
-{
-       fsverity_sysctl_header = register_sysctl("fs/verity", fsverity_sysctl_table);
-       if (!fsverity_sysctl_header) {
-               pr_err("sysctl registration failed!\n");
-               return -ENOMEM;
-       }
-       return 0;
-}
-#else /* !CONFIG_SYSCTL */
-static inline int __init fsverity_sysctl_init(void)
+void __init fsverity_init_signature(void)
 {
-       return 0;
-}
-#endif /* !CONFIG_SYSCTL */
-
-int __init fsverity_init_signature(void)
-{
-       struct key *ring;
-       int err;
-
-       ring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
-                            current_cred(), KEY_POS_SEARCH |
+       fsverity_keyring =
+               keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
+                             current_cred(), KEY_POS_SEARCH |
                                KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE |
                                KEY_USR_SEARCH | KEY_USR_SETATTR,
-                            KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
-       if (IS_ERR(ring))
-               return PTR_ERR(ring);
-
-       err = fsverity_sysctl_init();
-       if (err)
-               goto err_put_ring;
-
-       fsverity_keyring = ring;
-       return 0;
-
-err_put_ring:
-       key_put(ring);
-       return err;
+                             KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+       if (IS_ERR(fsverity_keyring))
+               panic("failed to allocate \".fs-verity\" keyring");
 }
index 433cef5..904ccd7 100644 (file)
@@ -346,7 +346,7 @@ void fsverity_enqueue_verify_work(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
 
-int __init fsverity_init_workqueue(void)
+void __init fsverity_init_workqueue(void)
 {
        /*
         * Use a high-priority workqueue to prioritize verification work, which
@@ -360,12 +360,5 @@ int __init fsverity_init_workqueue(void)
                                                  WQ_HIGHPRI,
                                                  num_online_cpus());
        if (!fsverity_read_workqueue)
-               return -ENOMEM;
-       return 0;
-}
-
-void __init fsverity_exit_workqueue(void)
-{
-       destroy_workqueue(fsverity_read_workqueue);
-       fsverity_read_workqueue = NULL;
+               panic("failed to allocate fsverity_read_queue");
 }
index e7bbb7f..efd4736 100644 (file)
@@ -1040,12 +1040,32 @@ const char *xattr_full_name(const struct xattr_handler *handler,
 EXPORT_SYMBOL(xattr_full_name);
 
 /**
- * free_simple_xattr - free an xattr object
+ * simple_xattr_space - estimate the memory used by a simple xattr
+ * @name: the full name of the xattr
+ * @size: the size of its value
+ *
+ * This takes no account of how much larger the two slab objects actually are:
+ * that would depend on the slab implementation, when what is required is a
+ * deterministic number, which grows with name length and size and quantity.
+ *
+ * Return: The approximate number of bytes of memory used by such an xattr.
+ */
+size_t simple_xattr_space(const char *name, size_t size)
+{
+       /*
+        * Use "40" instead of sizeof(struct simple_xattr), to return the
+        * same result on 32-bit and 64-bit, and even if simple_xattr grows.
+        */
+       return 40 + size + strlen(name);
+}
+
+/**
+ * simple_xattr_free - free an xattr object
  * @xattr: the xattr object
  *
  * Free the xattr object. Can handle @xattr being NULL.
  */
-static inline void free_simple_xattr(struct simple_xattr *xattr)
+void simple_xattr_free(struct simple_xattr *xattr)
 {
        if (xattr)
                kfree(xattr->name);
@@ -1073,7 +1093,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
        if (len < sizeof(*new_xattr))
                return NULL;
 
-       new_xattr = kvmalloc(len, GFP_KERNEL);
+       new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
        if (!new_xattr)
                return NULL;
 
@@ -1164,7 +1184,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
  * @value: the value to store along the xattr
  * @size: the size of @value
  * @flags: the flags determining how to set the xattr
- * @removed_size: the size of the removed xattr
  *
  * Set a new xattr object.
  * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
@@ -1181,29 +1200,27 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
  * XATTR_REPLACE we fail as mentioned above.
  *
- * Return: On success zero and on error a negative error code is returned.
+ * Return: On success, the removed or replaced xattr is returned, to be freed
+ * by the caller; or NULL if none. On failure a negative error code is returned.
  */
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
-                    const void *value, size_t size, int flags,
-                    ssize_t *removed_size)
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+                                     const char *name, const void *value,
+                                     size_t size, int flags)
 {
-       struct simple_xattr *xattr = NULL, *new_xattr = NULL;
+       struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
        struct rb_node *parent = NULL, **rbp;
        int err = 0, ret;
 
-       if (removed_size)
-               *removed_size = -1;
-
        /* value == NULL means remove */
        if (value) {
                new_xattr = simple_xattr_alloc(value, size);
                if (!new_xattr)
-                       return -ENOMEM;
+                       return ERR_PTR(-ENOMEM);
 
-               new_xattr->name = kstrdup(name, GFP_KERNEL);
+               new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
                if (!new_xattr->name) {
-                       free_simple_xattr(new_xattr);
-                       return -ENOMEM;
+                       simple_xattr_free(new_xattr);
+                       return ERR_PTR(-ENOMEM);
                }
        }
 
@@ -1217,12 +1234,12 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
                else if (ret > 0)
                        rbp = &(*rbp)->rb_right;
                else
-                       xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
-               if (xattr)
+                       old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
+               if (old_xattr)
                        break;
        }
 
-       if (xattr) {
+       if (old_xattr) {
                /* Fail if XATTR_CREATE is requested and the xattr exists. */
                if (flags & XATTR_CREATE) {
                        err = -EEXIST;
@@ -1230,12 +1247,10 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
                }
 
                if (new_xattr)
-                       rb_replace_node(&xattr->rb_node, &new_xattr->rb_node,
-                                       &xattrs->rb_root);
+                       rb_replace_node(&old_xattr->rb_node,
+                                       &new_xattr->rb_node, &xattrs->rb_root);
                else
-                       rb_erase(&xattr->rb_node, &xattrs->rb_root);
-               if (!err && removed_size)
-                       *removed_size = xattr->size;
+                       rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
        } else {
                /* Fail if XATTR_REPLACE is requested but no xattr is found. */
                if (flags & XATTR_REPLACE) {
@@ -1260,12 +1275,10 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
 
 out_unlock:
        write_unlock(&xattrs->lock);
-       if (err)
-               free_simple_xattr(new_xattr);
-       else
-               free_simple_xattr(xattr);
-       return err;
-
+       if (!err)
+               return old_xattr;
+       simple_xattr_free(new_xattr);
+       return ERR_PTR(err);
 }
 
 static bool xattr_is_trusted(const char *name)
@@ -1370,14 +1383,17 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
 /**
  * simple_xattrs_free - free xattrs
  * @xattrs: xattr header whose xattrs to destroy
+ * @freed_space: approximate number of bytes of memory freed from @xattrs
  *
  * Destroy all xattrs in @xattr. When this is called no one can hold a
  * reference to any of the xattrs anymore.
  */
-void simple_xattrs_free(struct simple_xattrs *xattrs)
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
 {
        struct rb_node *rbp;
 
+       if (freed_space)
+               *freed_space = 0;
        rbp = rb_first(&xattrs->rb_root);
        while (rbp) {
                struct simple_xattr *xattr;
@@ -1386,7 +1402,10 @@ void simple_xattrs_free(struct simple_xattrs *xattrs)
                rbp_next = rb_next(rbp);
                xattr = rb_entry(rbp, struct simple_xattr, rb_node);
                rb_erase(&xattr->rb_node, &xattrs->rb_root);
-               free_simple_xattr(xattr);
+               if (freed_space)
+                       *freed_space += simple_xattr_space(xattr->name,
+                                                          xattr->size);
+               simple_xattr_free(xattr);
                rbp = rbp_next;
        }
 }
index 25e2841..f9015f8 100644 (file)
@@ -591,7 +591,7 @@ struct xfs_attr_shortform {
                uint8_t valuelen;       /* actual length of value (no NULL) */
                uint8_t flags;  /* flags bits (see xfs_attr_leaf.h) */
                uint8_t nameval[];      /* name & value bytes concatenated */
-       } list[1];                      /* variable sized array */
+       } list[];                       /* variable sized array */
 };
 
 typedef struct xfs_attr_leaf_map {     /* RLE map of free bytes */
@@ -620,19 +620,29 @@ typedef struct xfs_attr_leaf_entry {      /* sorted on key, not name */
 typedef struct xfs_attr_leaf_name_local {
        __be16  valuelen;               /* number of bytes in value */
        __u8    namelen;                /* length of name bytes */
-       __u8    nameval[1];             /* name/value bytes */
+       /*
+        * In Linux 6.5 this flex array was converted from nameval[1] to
+        * nameval[].  Be very careful here about extra padding at the end;
+        * see xfs_attr_leaf_entsize_local() for details.
+        */
+       __u8    nameval[];              /* name/value bytes */
 } xfs_attr_leaf_name_local_t;
 
 typedef struct xfs_attr_leaf_name_remote {
        __be32  valueblk;               /* block number of value bytes */
        __be32  valuelen;               /* number of bytes in value */
        __u8    namelen;                /* length of name bytes */
-       __u8    name[1];                /* name bytes */
+       /*
+        * In Linux 6.5 this flex array was converted from name[1] to name[].
+        * Be very careful here about extra padding at the end; see
+        * xfs_attr_leaf_entsize_remote() for details.
+        */
+       __u8    name[];                 /* name bytes */
 } xfs_attr_leaf_name_remote_t;
 
 typedef struct xfs_attr_leafblock {
        xfs_attr_leaf_hdr_t     hdr;    /* constant-structure header block */
-       xfs_attr_leaf_entry_t   entries[1];     /* sorted on key, not name */
+       xfs_attr_leaf_entry_t   entries[];      /* sorted on key, not name */
        /*
         * The rest of the block contains the following structures after the
         * leaf entries, growing from the bottom up. The variables are never
@@ -664,7 +674,7 @@ struct xfs_attr3_leaf_hdr {
 
 struct xfs_attr3_leafblock {
        struct xfs_attr3_leaf_hdr       hdr;
-       struct xfs_attr_leaf_entry      entries[1];
+       struct xfs_attr_leaf_entry      entries[];
 
        /*
         * The rest of the block contains the following structures after the
@@ -747,14 +757,61 @@ xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
  */
 static inline int xfs_attr_leaf_entsize_remote(int nlen)
 {
-       return round_up(sizeof(struct xfs_attr_leaf_name_remote) - 1 +
-                       nlen, XFS_ATTR_LEAF_NAME_ALIGN);
+       /*
+        * Prior to Linux 6.5, struct xfs_attr_leaf_name_remote ended with
+        * name[1], which was used as a flexarray.  The layout of this struct
+        * is 9 bytes of fixed-length fields followed by a __u8 flex array at
+        * offset 9.
+        *
+        * On most architectures, struct xfs_attr_leaf_name_remote had two
+        * bytes of implicit padding at the end of the struct to make the
+        * struct length 12.  After converting name[1] to name[], there are
+        * three implicit padding bytes and the struct size remains 12.
+        * However, there are compiler configurations that do not add implicit
+        * padding at all (m68k) and have been broken for years.
+        *
+        * This entsize computation historically added (the xattr name length)
+        * to (the padded struct length - 1) and rounded that sum up to the
+        * nearest multiple of 4 (NAME_ALIGN).  IOWs, round_up(11 + nlen, 4).
+        * This is encoded in the ondisk format, so we cannot change this.
+        *
+        * Compute the entsize from offsetof of the flexarray and manually
+        * adding bytes for the implicit padding.
+        */
+       const size_t remotesize =
+                       offsetof(struct xfs_attr_leaf_name_remote, name) + 2;
+
+       return round_up(remotesize + nlen, XFS_ATTR_LEAF_NAME_ALIGN);
 }
 
 static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
 {
-       return round_up(sizeof(struct xfs_attr_leaf_name_local) - 1 +
-                       nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN);
+       /*
+        * Prior to Linux 6.5, struct xfs_attr_leaf_name_local ended with
+        * nameval[1], which was used as a flexarray.  The layout of this
+        * struct is 3 bytes of fixed-length fields followed by a __u8 flex
+        * array at offset 3.
+        *
+        * struct xfs_attr_leaf_name_local had zero bytes of implicit padding
+        * at the end of the struct to make the struct length 4.  On most
+        * architectures, after converting nameval[1] to nameval[], there is
+        * one implicit padding byte and the struct size remains 4.  However,
+        * there are compiler configurations that do not add implicit padding
+        * at all (m68k) and would break.
+        *
+        * This entsize computation historically added (the xattr name and
+        * value length) to (the padded struct length - 1) and rounded that sum
+        * up to the nearest multiple of 4 (NAME_ALIGN).  IOWs, the formula is
+        * round_up(3 + nlen + vlen, 4).  This is encoded in the ondisk format,
+        * so we cannot change this.
+        *
+        * Compute the entsize from offsetof of the flexarray and manually
+        * adding bytes for the implicit padding.
+        */
+       const size_t localsize =
+                       offsetof(struct xfs_attr_leaf_name_local, nameval);
+
+       return round_up(localsize + nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN);
 }
 
 static inline int xfs_attr_leaf_entsize_local_max(int bsize)
index 9c60ebb..2cbf9ea 100644 (file)
@@ -592,12 +592,12 @@ typedef struct xfs_attrlist_cursor {
 struct xfs_attrlist {
        __s32   al_count;       /* number of entries in attrlist */
        __s32   al_more;        /* T/F: more attrs (do call again) */
-       __s32   al_offset[1];   /* byte offsets of attrs [var-sized] */
+       __s32   al_offset[];    /* byte offsets of attrs [var-sized] */
 };
 
 struct xfs_attrlist_ent {      /* data from attr_list() */
        __u32   a_valuelen;     /* number bytes in value of attr */
-       char    a_name[1];      /* attr name (NULL terminated) */
+       char    a_name[];       /* attr name (NULL terminated) */
 };
 
 typedef struct xfs_fsop_attrlist_handlereq {
index 758aacd..a357815 100644 (file)
@@ -222,7 +222,8 @@ xfs_inode_from_disk(
         */
        inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
        inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
-       inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
+       inode_set_ctime_to_ts(inode,
+                             xfs_inode_from_disk_ts(from, from->di_ctime));
 
        ip->i_disk_size = be64_to_cpu(from->di_size);
        ip->i_nblocks = be64_to_cpu(from->di_nblocks);
@@ -316,7 +317,7 @@ xfs_inode_to_disk(
 
        to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
        to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
-       to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
+       to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode));
        to->di_nlink = cpu_to_be32(inode->i_nlink);
        to->di_gen = cpu_to_be32(inode->i_generation);
        to->di_mode = cpu_to_be16(inode->i_mode);
index cb4796b..ad22656 100644 (file)
@@ -62,12 +62,12 @@ xfs_trans_ichgtime(
        ASSERT(tp);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
 
-       tv = current_time(inode);
+       /* If the mtime changes, then ctime must also change */
+       ASSERT(flags & XFS_ICHGTIME_CHG);
 
+       tv = inode_set_ctime_current(inode);
        if (flags & XFS_ICHGTIME_MOD)
                inode->i_mtime = tv;
-       if (flags & XFS_ICHGTIME_CHG)
-               inode->i_ctime = tv;
        if (flags & XFS_ICHGTIME_CREATE)
                ip->i_crtime = tv;
 }
index e382a35..05be757 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Copyright (C) 2019-2023 Oracle.  All Rights Reserved.
  * Author: Darrick J. Wong <djwong@kernel.org>
@@ -8,6 +8,8 @@
 #include "xfs_shared.h"
 #include "xfs_format.h"
 #include "xfs_trans_resv.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
 #include "xfs_mount.h"
 #include "xfs_alloc.h"
 #include "xfs_ialloc.h"
@@ -16,6 +18,7 @@
 #include "xfs_ag.h"
 #include "xfs_rtalloc.h"
 #include "xfs_inode.h"
+#include "xfs_icache.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
@@ -53,6 +56,7 @@ struct xchk_fscounters {
        uint64_t                frextents;
        unsigned long long      icount_min;
        unsigned long long      icount_max;
+       bool                    frozen;
 };
 
 /*
@@ -123,6 +127,82 @@ xchk_fscount_warmup(
        return error;
 }
 
+static inline int
+xchk_fsfreeze(
+       struct xfs_scrub        *sc)
+{
+       int                     error;
+
+       error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+       trace_xchk_fsfreeze(sc, error);
+       return error;
+}
+
+static inline int
+xchk_fsthaw(
+       struct xfs_scrub        *sc)
+{
+       int                     error;
+
+       /* This should always succeed, we have a kernel freeze */
+       error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+       trace_xchk_fsthaw(sc, error);
+       return error;
+}
+
+/*
+ * We couldn't stabilize the filesystem long enough to sample all the variables
+ * that comprise the summary counters and compare them to the percpu counters.
+ * We need to disable all writer threads, which means taking the first two
+ * freeze levels to put userspace to sleep, and the third freeze level to
+ * prevent background threads from starting new transactions.  Take one level
+ * more to prevent other callers from unfreezing the filesystem while we run.
+ */
+STATIC int
+xchk_fscounters_freeze(
+       struct xfs_scrub        *sc)
+{
+       struct xchk_fscounters  *fsc = sc->buf;
+       int                     error = 0;
+
+       if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
+               sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
+               mnt_drop_write_file(sc->file);
+       }
+
+       /* Try to grab a kernel freeze. */
+       while ((error = xchk_fsfreeze(sc)) == -EBUSY) {
+               if (xchk_should_terminate(sc, &error))
+                       return error;
+
+               delay(HZ / 10);
+       }
+       if (error)
+               return error;
+
+       fsc->frozen = true;
+       return 0;
+}
+
+/* Thaw the filesystem after checking or repairing fscounters. */
+STATIC void
+xchk_fscounters_cleanup(
+       void                    *buf)
+{
+       struct xchk_fscounters  *fsc = buf;
+       struct xfs_scrub        *sc = fsc->sc;
+       int                     error;
+
+       if (!fsc->frozen)
+               return;
+
+       error = xchk_fsthaw(sc);
+       if (error)
+               xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error);
+       else
+               fsc->frozen = false;
+}
+
 int
 xchk_setup_fscounters(
        struct xfs_scrub        *sc)
@@ -140,6 +220,7 @@ xchk_setup_fscounters(
        sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
        if (!sc->buf)
                return -ENOMEM;
+       sc->buf_cleanup = xchk_fscounters_cleanup;
        fsc = sc->buf;
        fsc->sc = sc;
 
@@ -150,7 +231,18 @@ xchk_setup_fscounters(
        if (error)
                return error;
 
-       return xchk_trans_alloc(sc, 0);
+       /*
+        * Pause all writer activity in the filesystem while we're scrubbing to
+        * reduce the likelihood of background perturbations to the counters
+        * throwing off our calculations.
+        */
+       if (sc->flags & XCHK_TRY_HARDER) {
+               error = xchk_fscounters_freeze(sc);
+               if (error)
+                       return error;
+       }
+
+       return xfs_trans_alloc_empty(sc->mp, &sc->tp);
 }
 
 /*
@@ -290,8 +382,7 @@ retry:
        if (fsc->ifree > fsc->icount) {
                if (tries--)
                        goto retry;
-               xchk_set_incomplete(sc);
-               return 0;
+               return -EDEADLOCK;
        }
 
        return 0;
@@ -367,6 +458,8 @@ xchk_fscount_count_frextents(
  * Otherwise, we /might/ have a problem.  If the change in the summations is
  * more than we want to tolerate, the filesystem is probably busy and we should
  * just send back INCOMPLETE and see if userspace will try again.
+ *
+ * If we're repairing then we require an exact match.
  */
 static inline bool
 xchk_fscount_within_range(
@@ -396,21 +489,7 @@ xchk_fscount_within_range(
        if (expected >= min_value && expected <= max_value)
                return true;
 
-       /*
-        * If the difference between the two summations is too large, the fs
-        * might just be busy and so we'll mark the scrub incomplete.  Return
-        * true here so that we don't mark the counter corrupt.
-        *
-        * XXX: In the future when userspace can grant scrub permission to
-        * quiesce the filesystem to solve the outsized variance problem, this
-        * check should be moved up and the return code changed to signal to
-        * userspace that we need quiesce permission.
-        */
-       if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
-               xchk_set_incomplete(sc);
-               return true;
-       }
-
+       /* Everything else is bad. */
        return false;
 }
 
@@ -422,6 +501,7 @@ xchk_fscounters(
        struct xfs_mount        *mp = sc->mp;
        struct xchk_fscounters  *fsc = sc->buf;
        int64_t                 icount, ifree, fdblocks, frextents;
+       bool                    try_again = false;
        int                     error;
 
        /* Snapshot the percpu counters. */
@@ -431,9 +511,26 @@ xchk_fscounters(
        frextents = percpu_counter_sum(&mp->m_frextents);
 
        /* No negative values, please! */
-       if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0)
+       if (icount < 0 || ifree < 0)
                xchk_set_corrupt(sc);
 
+       /*
+        * If the filesystem is not frozen, the counter summation calls above
+        * can race with xfs_mod_freecounter, which subtracts a requested space
+        * reservation from the counter and undoes the subtraction if that made
+        * the counter go negative.  Therefore, it's possible to see negative
+        * values here, and we should only flag that as a corruption if we
+        * froze the fs.  This is much more likely to happen with frextents
+        * since there are no reserved pools.
+        */
+       if (fdblocks < 0 || frextents < 0) {
+               if (!fsc->frozen)
+                       return -EDEADLOCK;
+
+               xchk_set_corrupt(sc);
+               return 0;
+       }
+
        /* See if icount is obviously wrong. */
        if (icount < fsc->icount_min || icount > fsc->icount_max)
                xchk_set_corrupt(sc);
@@ -447,12 +544,6 @@ xchk_fscounters(
                xchk_set_corrupt(sc);
 
        /*
-        * XXX: We can't quiesce percpu counter updates, so exit early.
-        * This can be re-enabled when we gain exclusive freeze functionality.
-        */
-       return 0;
-
-       /*
         * If ifree exceeds icount by more than the minimum variance then
         * something's probably wrong with the counters.
         */
@@ -463,8 +554,6 @@ xchk_fscounters(
        error = xchk_fscount_aggregate_agcounts(sc, fsc);
        if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
                return error;
-       if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
-               return 0;
 
        /* Count the free extents counter for rt volumes. */
        error = xchk_fscount_count_frextents(sc, fsc);
@@ -473,20 +562,45 @@ xchk_fscounters(
        if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
                return 0;
 
-       /* Compare the in-core counters with whatever we counted. */
-       if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
-               xchk_set_corrupt(sc);
+       /*
+        * Compare the in-core counters with whatever we counted.  If the fs is
+        * frozen, we treat the discrepancy as a corruption because the freeze
+        * should have stabilized the counter values.  Otherwise, we need
+        * userspace to call us back having granted us freeze permission.
+        */
+       if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
+                               fsc->icount)) {
+               if (fsc->frozen)
+                       xchk_set_corrupt(sc);
+               else
+                       try_again = true;
+       }
 
-       if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
-               xchk_set_corrupt(sc);
+       if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
+               if (fsc->frozen)
+                       xchk_set_corrupt(sc);
+               else
+                       try_again = true;
+       }
 
        if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
-                       fsc->fdblocks))
-               xchk_set_corrupt(sc);
+                       fsc->fdblocks)) {
+               if (fsc->frozen)
+                       xchk_set_corrupt(sc);
+               else
+                       try_again = true;
+       }
 
        if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
-                       fsc->frextents))
-               xchk_set_corrupt(sc);
+                       fsc->frextents)) {
+               if (fsc->frozen)
+                       xchk_set_corrupt(sc);
+               else
+                       try_again = true;
+       }
+
+       if (try_again)
+               return -EDEADLOCK;
 
        return 0;
 }
index 3d98f60..a0fffbc 100644 (file)
@@ -184,8 +184,10 @@ xchk_teardown(
                        xchk_irele(sc, sc->ip);
                sc->ip = NULL;
        }
-       if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+       if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
+               sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
                mnt_drop_write_file(sc->file);
+       }
        if (sc->buf) {
                if (sc->buf_cleanup)
                        sc->buf_cleanup(sc->buf);
@@ -505,6 +507,8 @@ retry_op:
                error = mnt_want_write_file(sc->file);
                if (error)
                        goto out_sc;
+
+               sc->flags |= XCHK_HAVE_FREEZE_PROT;
        }
 
        /* Set up for the operation. */
index e113f2f..f8ba00e 100644 (file)
@@ -106,6 +106,7 @@ struct xfs_scrub {
 
 /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
 #define XCHK_TRY_HARDER                (1U << 0)  /* can't get resources, try again */
+#define XCHK_HAVE_FREEZE_PROT  (1U << 1)  /* do we have freeze protection? */
 #define XCHK_FSGATES_DRAIN     (1U << 2)  /* defer ops draining enabled */
 #define XCHK_NEED_DRAIN                (1U << 3)  /* scrub needs to drain defer ops */
 #define XREP_ALREADY_FIXED     (1U << 31) /* checking our repair work */
index b3894da..0b54f1a 100644 (file)
@@ -98,6 +98,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
 
 #define XFS_SCRUB_STATE_STRINGS \
        { XCHK_TRY_HARDER,                      "try_harder" }, \
+       { XCHK_HAVE_FREEZE_PROT,                "nofreeze" }, \
        { XCHK_FSGATES_DRAIN,                   "fsgates_drain" }, \
        { XCHK_NEED_DRAIN,                      "need_drain" }, \
        { XREP_ALREADY_FIXED,                   "already_fixed" }
@@ -693,6 +694,31 @@ TRACE_EVENT(xchk_fscounters_within_range,
                  __entry->old_value)
 )
 
+DECLARE_EVENT_CLASS(xchk_fsfreeze_class,
+       TP_PROTO(struct xfs_scrub *sc, int error),
+       TP_ARGS(sc, error),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(unsigned int, type)
+               __field(int, error)
+       ),
+       TP_fast_assign(
+               __entry->dev = sc->mp->m_super->s_dev;
+               __entry->type = sc->sm->sm_type;
+               __entry->error = error;
+       ),
+       TP_printk("dev %d:%d type %s error %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+                 __entry->error)
+);
+#define DEFINE_XCHK_FSFREEZE_EVENT(name) \
+DEFINE_EVENT(xchk_fsfreeze_class, name, \
+       TP_PROTO(struct xfs_scrub *sc, int error), \
+       TP_ARGS(sc, error))
+DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsfreeze);
+DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsthaw);
+
 TRACE_EVENT(xchk_refcount_incorrect,
        TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *irec,
                 xfs_nlink_t seen),
index 791db7d..6b84030 100644 (file)
@@ -233,7 +233,7 @@ xfs_acl_set_mode(
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        inode->i_mode = mode;
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
        if (xfs_has_wsync(mp))
index 451942f..2fca4b4 100644 (file)
@@ -578,7 +578,7 @@ const struct address_space_operations xfs_address_space_operations = {
        .read_folio             = xfs_vm_read_folio,
        .readahead              = xfs_vm_readahead,
        .writepages             = xfs_vm_writepages,
-       .dirty_folio            = filemap_dirty_folio,
+       .dirty_folio            = iomap_dirty_folio,
        .release_folio          = iomap_release_folio,
        .invalidate_folio       = iomap_invalidate_folio,
        .bmap                   = xfs_vm_bmap,
index fbb6755..fcefab6 100644 (file)
@@ -1644,6 +1644,7 @@ xfs_swap_extents(
        uint64_t                f;
        int                     resblks = 0;
        unsigned int            flags = 0;
+       struct timespec64       ctime;
 
        /*
         * Lock the inodes against other IO, page faults and truncate to
@@ -1756,8 +1757,9 @@ xfs_swap_extents(
         * process that the file was not changed out from
         * under it.
         */
-       if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
-           (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
+       ctime = inode_get_ctime(VFS_I(ip));
+       if ((sbp->bs_ctime.tv_sec != ctime.tv_sec) ||
+           (sbp->bs_ctime.tv_nsec != ctime.tv_nsec) ||
            (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
            (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
                error = -EBUSY;
index 15d1e5a..3b903f6 100644 (file)
@@ -1938,14 +1938,17 @@ void
 xfs_free_buftarg(
        struct xfs_buftarg      *btp)
 {
+       struct block_device     *bdev = btp->bt_bdev;
+
        unregister_shrinker(&btp->bt_shrinker);
        ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
        percpu_counter_destroy(&btp->bt_io_count);
        list_lru_destroy(&btp->bt_lru);
 
-       blkdev_issue_flush(btp->bt_bdev);
-       invalidate_bdev(btp->bt_bdev);
        fs_put_dax(btp->bt_daxdev, btp->bt_mount);
+       /* the main block device is closed by kill_block_super */
+       if (bdev != btp->bt_mount->m_super->s_bdev)
+               blkdev_put(bdev, btp->bt_mount->m_super);
 
        kmem_free(btp);
 }
index 9e62cc5..360fe83 100644 (file)
@@ -843,10 +843,9 @@ xfs_init_new_inode(
        ip->i_df.if_nextents = 0;
        ASSERT(ip->i_nblocks == 0);
 
-       tv = current_time(inode);
+       tv = inode_set_ctime_current(inode);
        inode->i_mtime = tv;
        inode->i_atime = tv;
-       inode->i_ctime = tv;
 
        ip->i_extsize = 0;
        ip->i_diflags = 0;
index 91c847a..127b241 100644 (file)
@@ -528,7 +528,7 @@ xfs_inode_to_log_dinode(
        memset(to->di_pad3, 0, sizeof(to->di_pad3));
        to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
        to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
-       to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime);
+       to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode));
        to->di_nlink = inode->i_nlink;
        to->di_gen = inode->i_generation;
        to->di_mode = inode->i_mode;
index 24718ad..2ededd3 100644 (file)
@@ -573,10 +573,10 @@ xfs_vn_getattr(
        stat->gid = vfsgid_into_kgid(vfsgid);
        stat->ino = ip->i_ino;
        stat->atime = inode->i_atime;
-       stat->mtime = inode->i_mtime;
-       stat->ctime = inode->i_ctime;
        stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
 
+       fill_mg_cmtime(stat, request_mask, inode);
+
        if (xfs_has_v3inodes(mp)) {
                if (request_mask & STATX_BTIME) {
                        stat->result_mask |= STATX_BTIME;
@@ -917,7 +917,7 @@ xfs_setattr_size(
        if (newsize != oldsize &&
            !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
                iattr->ia_ctime = iattr->ia_mtime =
-                       current_time(inode);
+                       current_mgtime(inode);
                iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
        }
 
@@ -1029,7 +1029,6 @@ xfs_vn_setattr(
 STATIC int
 xfs_vn_update_time(
        struct inode            *inode,
-       struct timespec64       *now,
        int                     flags)
 {
        struct xfs_inode        *ip = XFS_I(inode);
@@ -1037,13 +1036,16 @@ xfs_vn_update_time(
        int                     log_flags = XFS_ILOG_TIMESTAMP;
        struct xfs_trans        *tp;
        int                     error;
+       struct timespec64       now;
 
        trace_xfs_update_time(ip);
 
        if (inode->i_sb->s_flags & SB_LAZYTIME) {
                if (!((flags & S_VERSION) &&
-                     inode_maybe_inc_iversion(inode, false)))
-                       return generic_update_time(inode, now, flags);
+                     inode_maybe_inc_iversion(inode, false))) {
+                       generic_update_time(inode, flags);
+                       return 0;
+               }
 
                /* Capture the iversion update that just occurred */
                log_flags |= XFS_ILOG_CORE;
@@ -1054,12 +1056,15 @@ xfs_vn_update_time(
                return error;
 
        xfs_ilock(ip, XFS_ILOCK_EXCL);
-       if (flags & S_CTIME)
-               inode->i_ctime = *now;
+       if (flags & (S_CTIME|S_MTIME))
+               now = inode_set_ctime_current(inode);
+       else
+               now = current_time(inode);
+
        if (flags & S_MTIME)
-               inode->i_mtime = *now;
+               inode->i_mtime = now;
        if (flags & S_ATIME)
-               inode->i_atime = *now;
+               inode->i_atime = now;
 
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_log_inode(tp, ip, log_flags);
index f225413..c2093cb 100644 (file)
@@ -100,8 +100,8 @@ xfs_bulkstat_one_int(
        buf->bs_atime_nsec = inode->i_atime.tv_nsec;
        buf->bs_mtime = inode->i_mtime.tv_sec;
        buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
-       buf->bs_ctime = inode->i_ctime.tv_sec;
-       buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
+       buf->bs_ctime = inode_get_ctime(inode).tv_sec;
+       buf->bs_ctime_nsec = inode_get_ctime(inode).tv_nsec;
        buf->bs_gen = inode->i_generation;
        buf->bs_mode = inode->i_mode;
 
index 9737b5a..c4cc99b 100644 (file)
@@ -56,7 +56,7 @@ xfs_check_ondisk_structs(void)
 
        /* dir/attr trees */
        XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr,        80);
-       XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock,       88);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock,       80);
        XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr,         56);
        XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo,           56);
        XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode,           64);
@@ -88,7 +88,8 @@ xfs_check_ondisk_structs(void)
        XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valuelen, 4);
        XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen,  8);
        XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name,     9);
-       XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t,             40);
+       XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t,             32);
+       XFS_CHECK_STRUCT_SIZE(struct xfs_attr_shortform,        4);
        XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.totsize, 0);
        XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.count,   2);
        XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].namelen,    4);
index 8185102..c79eac0 100644 (file)
@@ -377,17 +377,6 @@ disable_dax:
        return 0;
 }
 
-static void
-xfs_bdev_mark_dead(
-       struct block_device     *bdev)
-{
-       xfs_force_shutdown(bdev->bd_holder, SHUTDOWN_DEVICE_REMOVED);
-}
-
-static const struct blk_holder_ops xfs_holder_ops = {
-       .mark_dead              = xfs_bdev_mark_dead,
-};
-
 STATIC int
 xfs_blkdev_get(
        xfs_mount_t             *mp,
@@ -396,8 +385,8 @@ xfs_blkdev_get(
 {
        int                     error = 0;
 
-       *bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, mp,
-                                   &xfs_holder_ops);
+       *bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE,
+                                   mp->m_super, &fs_holder_ops);
        if (IS_ERR(*bdevp)) {
                error = PTR_ERR(*bdevp);
                xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
@@ -407,31 +396,45 @@ xfs_blkdev_get(
 }
 
 STATIC void
-xfs_blkdev_put(
-       struct xfs_mount        *mp,
-       struct block_device     *bdev)
-{
-       if (bdev)
-               blkdev_put(bdev, mp);
-}
-
-STATIC void
-xfs_close_devices(
+xfs_shutdown_devices(
        struct xfs_mount        *mp)
 {
+       /*
+        * Udev is triggered whenever anyone closes a block device or unmounts
+        * a file systemm on a block device.
+        * The default udev rules invoke blkid to read the fs super and create
+        * symlinks to the bdev under /dev/disk.  For this, it uses buffered
+        * reads through the page cache.
+        *
+        * xfs_db also uses buffered reads to examine metadata.  There is no
+        * coordination between xfs_db and udev, which means that they can run
+        * concurrently.  Note there is no coordination between the kernel and
+        * blkid either.
+        *
+        * On a system with 64k pages, the page cache can cache the superblock
+        * and the root inode (and hence the root directory) with the same 64k
+        * page.  If udev spawns blkid after the mkfs and the system is busy
+        * enough that it is still running when xfs_db starts up, they'll both
+        * read from the same page in the pagecache.
+        *
+        * The unmount writes updated inode metadata to disk directly.  The XFS
+        * buffer cache does not use the bdev pagecache, so it needs to
+        * invalidate that pagecache on unmount.  If the above scenario occurs,
+        * the pagecache no longer reflects what's on disk, xfs_db reads the
+        * stale metadata, and fails to find /a.  Most of the time this succeeds
+        * because closing a bdev invalidates the page cache, but when processes
+        * race, everyone loses.
+        */
        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
-               struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-
-               xfs_free_buftarg(mp->m_logdev_targp);
-               xfs_blkdev_put(mp, logdev);
+               blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
+               invalidate_bdev(mp->m_logdev_targp->bt_bdev);
        }
        if (mp->m_rtdev_targp) {
-               struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-
-               xfs_free_buftarg(mp->m_rtdev_targp);
-               xfs_blkdev_put(mp, rtdev);
+               blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
+               invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
        }
-       xfs_free_buftarg(mp->m_ddev_targp);
+       blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+       invalidate_bdev(mp->m_ddev_targp->bt_bdev);
 }
 
 /*
@@ -448,17 +451,24 @@ STATIC int
 xfs_open_devices(
        struct xfs_mount        *mp)
 {
-       struct block_device     *ddev = mp->m_super->s_bdev;
+       struct super_block      *sb = mp->m_super;
+       struct block_device     *ddev = sb->s_bdev;
        struct block_device     *logdev = NULL, *rtdev = NULL;
        int                     error;
 
        /*
+        * blkdev_put() can't be called under s_umount, see the comment
+        * in get_tree_bdev() for more details
+        */
+       up_write(&sb->s_umount);
+
+       /*
         * Open real time and log devices - order is important.
         */
        if (mp->m_logname) {
                error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
                if (error)
-                       return error;
+                       goto out_relock;
        }
 
        if (mp->m_rtname) {
@@ -496,7 +506,10 @@ xfs_open_devices(
                mp->m_logdev_targp = mp->m_ddev_targp;
        }
 
-       return 0;
+       error = 0;
+out_relock:
+       down_write(&sb->s_umount);
+       return error;
 
  out_free_rtdev_targ:
        if (mp->m_rtdev_targp)
@@ -504,11 +517,12 @@ xfs_open_devices(
  out_free_ddev_targ:
        xfs_free_buftarg(mp->m_ddev_targp);
  out_close_rtdev:
-       xfs_blkdev_put(mp, rtdev);
+        if (rtdev)
+                blkdev_put(rtdev, sb);
  out_close_logdev:
        if (logdev && logdev != ddev)
-               xfs_blkdev_put(mp, logdev);
-       return error;
+               blkdev_put(logdev, sb);
+       goto out_relock;
 }
 
 /*
@@ -758,6 +772,17 @@ static void
 xfs_mount_free(
        struct xfs_mount        *mp)
 {
+       /*
+        * Free the buftargs here because blkdev_put needs to be called outside
+        * of sb->s_umount, which is held around the call to ->put_super.
+        */
+       if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
+               xfs_free_buftarg(mp->m_logdev_targp);
+       if (mp->m_rtdev_targp)
+               xfs_free_buftarg(mp->m_rtdev_targp);
+       if (mp->m_ddev_targp)
+               xfs_free_buftarg(mp->m_ddev_targp);
+
        kfree(mp->m_rtname);
        kfree(mp->m_logname);
        kmem_free(mp);
@@ -1133,10 +1158,6 @@ xfs_fs_put_super(
 {
        struct xfs_mount        *mp = XFS_M(sb);
 
-       /* if ->fill_super failed, we have no mount to tear down */
-       if (!sb->s_fs_info)
-               return;
-
        xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
        xfs_filestream_unmount(mp);
        xfs_unmountfs(mp);
@@ -1147,10 +1168,7 @@ xfs_fs_put_super(
        xfs_inodegc_free_percpu(mp);
        xfs_destroy_percpu_counters(mp);
        xfs_destroy_mount_workqueues(mp);
-       xfs_close_devices(mp);
-
-       sb->s_fs_info = NULL;
-       xfs_mount_free(mp);
+       xfs_shutdown_devices(mp);
 }
 
 static long
@@ -1492,7 +1510,7 @@ xfs_fs_fill_super(
 
        error = xfs_fs_validate_params(mp);
        if (error)
-               goto out_free_names;
+               return error;
 
        sb_min_blocksize(sb, BBSIZE);
        sb->s_xattr = xfs_xattr_handlers;
@@ -1519,11 +1537,11 @@ xfs_fs_fill_super(
 
        error = xfs_open_devices(mp);
        if (error)
-               goto out_free_names;
+               return error;
 
        error = xfs_init_mount_workqueues(mp);
        if (error)
-               goto out_close_devices;
+               goto out_shutdown_devices;
 
        error = xfs_init_percpu_counters(mp);
        if (error)
@@ -1737,11 +1755,8 @@ xfs_fs_fill_super(
        xfs_destroy_percpu_counters(mp);
  out_destroy_workqueues:
        xfs_destroy_mount_workqueues(mp);
- out_close_devices:
-       xfs_close_devices(mp);
- out_free_names:
-       sb->s_fs_info = NULL;
-       xfs_mount_free(mp);
+ out_shutdown_devices:
+       xfs_shutdown_devices(mp);
        return error;
 
  out_unmount:
@@ -1934,7 +1949,8 @@ xfs_fs_reconfigure(
        return 0;
 }
 
-static void xfs_fs_free(
+static void
+xfs_fs_free(
        struct fs_context       *fc)
 {
        struct xfs_mount        *mp = fc->s_fs_info;
@@ -2003,13 +2019,21 @@ static int xfs_init_fs_context(
        return 0;
 }
 
+static void
+xfs_kill_sb(
+       struct super_block              *sb)
+{
+       kill_block_super(sb);
+       xfs_mount_free(XFS_M(sb));
+}
+
 static struct file_system_type xfs_fs_type = {
        .owner                  = THIS_MODULE,
        .name                   = "xfs",
        .init_fs_context        = xfs_init_fs_context,
        .parameters             = xfs_fs_parameters,
-       .kill_sb                = kill_block_super,
-       .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+       .kill_sb                = xfs_kill_sb,
+       .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
 };
 MODULE_ALIAS_FS("xfs");
 
index 92c9aaa..b2c9b35 100644 (file)
@@ -175,7 +175,7 @@ const struct address_space_operations zonefs_file_aops = {
        .read_folio             = zonefs_read_folio,
        .readahead              = zonefs_readahead,
        .writepages             = zonefs_writepages,
-       .dirty_folio            = filemap_dirty_folio,
+       .dirty_folio            = iomap_dirty_folio,
        .release_folio          = iomap_release_folio,
        .invalidate_folio       = iomap_invalidate_folio,
        .migrate_folio          = filemap_migrate_folio,
@@ -341,77 +341,6 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
        return generic_file_llseek_size(file, offset, whence, isize, isize);
 }
 
-struct zonefs_zone_append_bio {
-       /* The target inode of the BIO */
-       struct inode *inode;
-
-       /* For sync writes, the target append write offset */
-       u64 append_offset;
-
-       /*
-        * This member must come last, bio_alloc_bioset will allocate enough
-        * bytes for entire zonefs_bio but relies on bio being last.
-        */
-       struct bio bio;
-};
-
-static inline struct zonefs_zone_append_bio *
-zonefs_zone_append_bio(struct bio *bio)
-{
-       return container_of(bio, struct zonefs_zone_append_bio, bio);
-}
-
-static void zonefs_file_zone_append_dio_bio_end_io(struct bio *bio)
-{
-       struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
-       struct zonefs_zone *z = zonefs_inode_zone(za_bio->inode);
-       sector_t za_sector;
-
-       if (bio->bi_status != BLK_STS_OK)
-               goto bio_end;
-
-       /*
-        * If the file zone was written underneath the file system, the zone
-        * append operation can still succedd (if the zone is not full) but
-        * the write append location will not be where we expect it to be.
-        * Check that we wrote where we intended to, that is, at z->z_wpoffset.
-        */
-       za_sector = z->z_sector + (za_bio->append_offset >> SECTOR_SHIFT);
-       if (bio->bi_iter.bi_sector != za_sector) {
-               zonefs_warn(za_bio->inode->i_sb,
-                           "Invalid write sector %llu for zone at %llu\n",
-                           bio->bi_iter.bi_sector, z->z_sector);
-               bio->bi_status = BLK_STS_IOERR;
-       }
-
-bio_end:
-       iomap_dio_bio_end_io(bio);
-}
-
-static void zonefs_file_zone_append_dio_submit_io(const struct iomap_iter *iter,
-                                                 struct bio *bio,
-                                                 loff_t file_offset)
-{
-       struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
-       struct inode *inode = iter->inode;
-       struct zonefs_zone *z = zonefs_inode_zone(inode);
-
-       /*
-        * Issue a zone append BIO to process sync dio writes. The append
-        * file offset is saved to check the zone append write location
-        * on completion of the BIO.
-        */
-       za_bio->inode = inode;
-       za_bio->append_offset = file_offset;
-
-       bio->bi_opf &= ~REQ_OP_WRITE;
-       bio->bi_opf |= REQ_OP_ZONE_APPEND;
-       bio->bi_iter.bi_sector = z->z_sector;
-       bio->bi_end_io = zonefs_file_zone_append_dio_bio_end_io;
-
-       submit_bio(bio);
-}
-
 static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
                                        int error, unsigned int flags)
 {
@@ -442,14 +371,6 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
        return 0;
 }
 
-static struct bio_set zonefs_zone_append_bio_set;
-
-static const struct iomap_dio_ops zonefs_zone_append_dio_ops = {
-       .submit_io      = zonefs_file_zone_append_dio_submit_io,
-       .end_io         = zonefs_file_write_dio_end_io,
-       .bio_set        = &zonefs_zone_append_bio_set,
-};
-
 static const struct iomap_dio_ops zonefs_write_dio_ops = {
        .end_io         = zonefs_file_write_dio_end_io,
 };
@@ -533,9 +454,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
        struct zonefs_inode_info *zi = ZONEFS_I(inode);
        struct zonefs_zone *z = zonefs_inode_zone(inode);
        struct super_block *sb = inode->i_sb;
-       const struct iomap_dio_ops *dio_ops;
-       bool sync = is_sync_kiocb(iocb);
-       bool append = false;
        ssize_t ret, count;
 
        /*
@@ -543,7 +461,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
         * as this can cause write reordering (e.g. the first aio gets EAGAIN
         * on the inode lock but the second goes through but is now unaligned).
         */
-       if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
+       if (zonefs_zone_is_seq(z) && !is_sync_kiocb(iocb) &&
+           (iocb->ki_flags & IOCB_NOWAIT))
                return -EOPNOTSUPP;
 
        if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -573,18 +492,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
                        goto inode_unlock;
                }
                mutex_unlock(&zi->i_truncate_mutex);
-               append = sync;
-       }
-
-       if (append) {
-               unsigned int max = bdev_max_zone_append_sectors(sb->s_bdev);
-
-               max = ALIGN_DOWN(max << SECTOR_SHIFT, sb->s_blocksize);
-               iov_iter_truncate(from, max);
-
-               dio_ops = &zonefs_zone_append_dio_ops;
-       } else {
-               dio_ops = &zonefs_write_dio_ops;
        }
 
        /*
@@ -593,7 +500,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
         * the user can make sense of the error.
         */
        ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
-                          dio_ops, 0, NULL, 0);
+                          &zonefs_write_dio_ops, 0, NULL, 0);
        if (ret == -ENOTBLK)
                ret = -EBUSY;
 
@@ -938,15 +845,3 @@ const struct file_operations zonefs_file_operations = {
        .splice_write   = iter_file_splice_write,
        .iopoll         = iocb_bio_iopoll,
 };
-
-int zonefs_file_bioset_init(void)
-{
-       return bioset_init(&zonefs_zone_append_bio_set, BIO_POOL_SIZE,
-                          offsetof(struct zonefs_zone_append_bio, bio),
-                          BIOSET_NEED_BVECS);
-}
-
-void zonefs_file_bioset_exit(void)
-{
-       bioset_exit(&zonefs_zone_append_bio_set);
-}
index bbe44a2..9d1a980 100644 (file)
@@ -658,7 +658,8 @@ static struct inode *zonefs_get_file_inode(struct inode *dir,
 
        inode->i_ino = ino;
        inode->i_mode = z->z_mode;
-       inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+       inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+                                                               inode_get_ctime(dir));
        inode->i_uid = z->z_uid;
        inode->i_gid = z->z_gid;
        inode->i_size = z->z_wpoffset;
@@ -694,7 +695,8 @@ static struct inode *zonefs_get_zgroup_inode(struct super_block *sb,
        inode->i_ino = ino;
        inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555);
        inode->i_size = sbi->s_zgroup[ztype].g_nr_zones;
-       inode->i_ctime = inode->i_mtime = inode->i_atime = root->i_ctime;
+       inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+                                                               inode_get_ctime(root));
        inode->i_private = &sbi->s_zgroup[ztype];
        set_nlink(inode, 2);
 
@@ -1317,7 +1319,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
 
        inode->i_ino = bdev_nr_zones(sb->s_bdev);
        inode->i_mode = S_IFDIR | 0555;
-       inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
        inode->i_op = &zonefs_dir_inode_operations;
        inode->i_fop = &zonefs_dir_operations;
        inode->i_size = 2;
@@ -1412,13 +1414,9 @@ static int __init zonefs_init(void)
 
        BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
 
-       ret = zonefs_file_bioset_init();
-       if (ret)
-               return ret;
-
        ret = zonefs_init_inodecache();
        if (ret)
-               goto destroy_bioset;
+               return ret;
 
        ret = zonefs_sysfs_init();
        if (ret)
@@ -1434,8 +1432,6 @@ sysfs_exit:
        zonefs_sysfs_exit();
 destroy_inodecache:
        zonefs_destroy_inodecache();
-destroy_bioset:
-       zonefs_file_bioset_exit();
 
        return ret;
 }
@@ -1445,7 +1441,6 @@ static void __exit zonefs_exit(void)
        unregister_filesystem(&zonefs_type);
        zonefs_sysfs_exit();
        zonefs_destroy_inodecache();
-       zonefs_file_bioset_exit();
 }
 
 MODULE_AUTHOR("Damien Le Moal");
index f663b8e..8175652 100644 (file)
@@ -279,8 +279,6 @@ extern const struct file_operations zonefs_dir_operations;
 extern const struct address_space_operations zonefs_file_aops;
 extern const struct file_operations zonefs_file_operations;
 int zonefs_file_truncate(struct inode *inode, loff_t isize);
-int zonefs_file_bioset_init(void);
-void zonefs_file_bioset_exit(void);
 
 /* In sysfs.c */
 int zonefs_sysfs_register(struct super_block *sb);
index d71291f..76aa6aa 100644 (file)
@@ -22,6 +22,7 @@
 #define METHOD_NAME__DDN        "_DDN"
 #define METHOD_NAME__DIS        "_DIS"
 #define METHOD_NAME__DMA        "_DMA"
+#define METHOD_NAME__EVT        "_EVT"
 #define METHOD_NAME__HID        "_HID"
 #define METHOD_NAME__INI        "_INI"
 #define METHOD_NAME__PLD        "_PLD"
index c941d99..2546850 100644 (file)
@@ -515,6 +515,12 @@ void acpi_bus_private_data_handler(acpi_handle, void *);
 int acpi_bus_get_private_data(acpi_handle, void **);
 int acpi_bus_attach_private_data(acpi_handle, void *);
 void acpi_bus_detach_private_data(acpi_handle);
+int acpi_dev_install_notify_handler(struct acpi_device *adev,
+                                   u32 handler_type,
+                                   acpi_notify_handler handler);
+void acpi_dev_remove_notify_handler(struct acpi_device *adev,
+                                   u32 handler_type,
+                                   acpi_notify_handler handler);
 extern int acpi_notifier_call_chain(struct acpi_device *, u32, u32);
 extern int register_acpi_notifier(struct notifier_block *);
 extern int unregister_acpi_notifier(struct notifier_block *);
@@ -563,8 +569,6 @@ int acpi_match_device_ids(struct acpi_device *device,
                          const struct acpi_device_id *ids);
 void acpi_set_modalias(struct acpi_device *adev, const char *default_id,
                       char *modalias, size_t len);
-int acpi_create_dir(struct acpi_device *);
-void acpi_remove_dir(struct acpi_device *);
 
 static inline bool acpi_device_enumerated(struct acpi_device *adev)
 {
@@ -645,6 +649,8 @@ int acpi_disable_wakeup_device_power(struct acpi_device *dev);
 #ifdef CONFIG_X86
 bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status);
 bool acpi_quirk_skip_acpi_ac_and_battery(void);
+int acpi_install_cmos_rtc_space_handler(acpi_handle handle);
+void acpi_remove_cmos_rtc_space_handler(acpi_handle handle);
 #else
 static inline bool acpi_device_override_status(struct acpi_device *adev,
                                               unsigned long long *status)
@@ -655,6 +661,13 @@ static inline bool acpi_quirk_skip_acpi_ac_and_battery(void)
 {
        return false;
 }
+static inline int acpi_install_cmos_rtc_space_handler(acpi_handle handle)
+{
+       return 1;
+}
+static inline void acpi_remove_cmos_rtc_space_handler(acpi_handle handle)
+{
+}
 #endif
 
 #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
index 9ffdc04..3d90716 100644 (file)
@@ -12,7 +12,7 @@
 
 /* Current ACPICA subsystem version in YYYYMMDD format */
 
-#define ACPI_CA_VERSION                 0x20230331
+#define ACPI_CA_VERSION                 0x20230628
 
 #include <acpi/acconfig.h>
 #include <acpi/actypes.h>
@@ -970,8 +970,6 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
                                               void **data,
                                               void (*callback)(void *)))
 
-void acpi_run_debugger(char *batch_buffer);
-
 void acpi_set_debugger_thread_id(acpi_thread_id thread_id);
 
 #endif                         /* __ACXFACE_H__ */
index 58b0490..8d5572a 100644 (file)
@@ -402,7 +402,7 @@ struct acpi_cdat_dsmas {
 
 /* Flags for subtable above */
 
-#define ACPI_CEDT_DSMAS_NON_VOLATILE        (1 << 2)
+#define ACPI_CDAT_DSMAS_NON_VOLATILE        (1 << 2)
 
 /* Subtable 1: Device scoped Latency and Bandwidth Information Structure (DSLBIS) */
 
index 0029336..3751ae6 100644 (file)
@@ -893,7 +893,10 @@ enum acpi_madt_type {
        ACPI_MADT_TYPE_BIO_PIC = 22,
        ACPI_MADT_TYPE_LPC_PIC = 23,
        ACPI_MADT_TYPE_RINTC = 24,
-       ACPI_MADT_TYPE_RESERVED = 25,   /* 25 to 0x7F are reserved */
+       ACPI_MADT_TYPE_IMSIC = 25,
+       ACPI_MADT_TYPE_APLIC = 26,
+       ACPI_MADT_TYPE_PLIC = 27,
+       ACPI_MADT_TYPE_RESERVED = 28,   /* 28 to 0x7F are reserved */
        ACPI_MADT_TYPE_OEM_RESERVED = 0x80      /* 0x80 to 0xFF are reserved for OEM use */
 };
 
@@ -1261,6 +1264,9 @@ struct acpi_madt_rintc {
        u32 flags;
        u64 hart_id;
        u32 uid;                /* ACPI processor UID */
+       u32 ext_intc_id;        /* External INTC Id */
+       u64 imsic_addr;         /* IMSIC base address */
+       u32 imsic_size;         /* IMSIC size */
 };
 
 /* Values for RISC-V INTC Version field above */
@@ -1271,6 +1277,48 @@ enum acpi_madt_rintc_version {
        ACPI_MADT_RINTC_VERSION_RESERVED = 2    /* 2 and greater are reserved */
 };
 
+/* 25: RISC-V IMSIC */
+struct acpi_madt_imsic {
+       struct acpi_subtable_header header;
+       u8 version;
+       u8 reserved;
+       u32 flags;
+       u16 num_ids;
+       u16 num_guest_ids;
+       u8 guest_index_bits;
+       u8 hart_index_bits;
+       u8 group_index_bits;
+       u8 group_index_shift;
+};
+
+/* 26: RISC-V APLIC */
+struct acpi_madt_aplic {
+       struct acpi_subtable_header header;
+       u8 version;
+       u8 id;
+       u32 flags;
+       u8 hw_id[8];
+       u16 num_idcs;
+       u16 num_sources;
+       u32 gsi_base;
+       u64 base_addr;
+       u32 size;
+};
+
+/* 27: RISC-V PLIC */
+struct acpi_madt_plic {
+       struct acpi_subtable_header header;
+       u8 version;
+       u8 id;
+       u8 hw_id[8];
+       u16 num_irqs;
+       u16 max_prio;
+       u32 flags;
+       u32 size;
+       u64 base_addr;
+       u32 gsi_base;
+};
+
 /* 80: OEM data */
 
 struct acpi_madt_oem_data {
@@ -2730,12 +2778,15 @@ enum acpi_rgrt_image_type {
 
 struct acpi_table_rhct {
        struct acpi_table_header header;        /* Common ACPI table header */
-       u32 reserved;
+       u32 flags;              /* RHCT flags */
        u64 time_base_freq;
        u32 node_count;
        u32 node_offset;
 };
 
+/* RHCT Flags */
+
+#define ACPI_RHCT_TIMER_CANNOT_WAKEUP_CPU       (1)
 /*
  * RHCT subtables
  */
@@ -2749,6 +2800,9 @@ struct acpi_rhct_node_header {
 
 enum acpi_rhct_node_type {
        ACPI_RHCT_NODE_TYPE_ISA_STRING = 0x0000,
+       ACPI_RHCT_NODE_TYPE_CMO = 0x0001,
+       ACPI_RHCT_NODE_TYPE_MMU = 0x0002,
+       ACPI_RHCT_NODE_TYPE_RESERVED = 0x0003,
        ACPI_RHCT_NODE_TYPE_HART_INFO = 0xFFFF,
 };
 
@@ -2762,6 +2816,24 @@ struct acpi_rhct_isa_string {
        char isa[];
 };
 
+struct acpi_rhct_cmo_node {
+       u8 reserved;            /* Must be zero */
+       u8 cbom_size;           /* CBOM size in powerof 2 */
+       u8 cbop_size;           /* CBOP size in powerof 2 */
+       u8 cboz_size;           /* CBOZ size in powerof 2 */
+};
+
+struct acpi_rhct_mmu_node {
+       u8 reserved;            /* Must be zero */
+       u8 mmu_type;            /* Virtual Address Scheme */
+};
+
+enum acpi_rhct_mmu_type {
+       ACPI_RHCT_MMU_TYPE_SV39 = 0,
+       ACPI_RHCT_MMU_TYPE_SV48 = 1,
+       ACPI_RHCT_MMU_TYPE_SV57 = 2
+};
+
 /* Hart Info node structure */
 struct acpi_rhct_hart_info {
        u16 num_offsets;
index 000764a..c080d57 100644 (file)
@@ -279,12 +279,14 @@ struct acpi_srat_gic_its_affinity {
  * 6: ACPI_SRAT_TYPE_GENERIC_PORT_AFFINITY
  */
 
+#define ACPI_SRAT_DEVICE_HANDLE_SIZE   16
+
 struct acpi_srat_generic_affinity {
        struct acpi_subtable_header header;
        u8 reserved;
        u8 device_handle_type;
        u32 proximity_domain;
-       u8 device_handle[16];
+       u8 device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE];
        u32 flags;
        u32 reserved1;
 };
diff --git a/include/acpi/pdc_intel.h b/include/acpi/pdc_intel.h
deleted file mode 100644 (file)
index 967c552..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/* _PDC bit definition for Intel processors */
-
-#ifndef __PDC_INTEL_H__
-#define __PDC_INTEL_H__
-
-#define ACPI_PDC_P_FFH                 (0x0001)
-#define ACPI_PDC_C_C1_HALT             (0x0002)
-#define ACPI_PDC_T_FFH                 (0x0004)
-#define ACPI_PDC_SMP_C1PT              (0x0008)
-#define ACPI_PDC_SMP_C2C3              (0x0010)
-#define ACPI_PDC_SMP_P_SWCOORD         (0x0020)
-#define ACPI_PDC_SMP_C_SWCOORD         (0x0040)
-#define ACPI_PDC_SMP_T_SWCOORD         (0x0080)
-#define ACPI_PDC_C_C1_FFH              (0x0100)
-#define ACPI_PDC_C_C2C3_FFH            (0x0200)
-#define ACPI_PDC_SMP_P_HWCOORD         (0x0800)
-
-#define ACPI_PDC_EST_CAPABILITY_SMP    (ACPI_PDC_SMP_C1PT | \
-                                        ACPI_PDC_C_C1_HALT | \
-                                        ACPI_PDC_P_FFH)
-
-#define ACPI_PDC_EST_CAPABILITY_SWSMP  (ACPI_PDC_SMP_C1PT | \
-                                        ACPI_PDC_C_C1_HALT | \
-                                        ACPI_PDC_SMP_P_SWCOORD | \
-                                        ACPI_PDC_SMP_P_HWCOORD | \
-                                        ACPI_PDC_P_FFH)
-
-#define ACPI_PDC_C_CAPABILITY_SMP      (ACPI_PDC_SMP_C2C3  | \
-                                        ACPI_PDC_SMP_C1PT  | \
-                                        ACPI_PDC_C_C1_HALT | \
-                                        ACPI_PDC_C_C1_FFH  | \
-                                        ACPI_PDC_C_C2C3_FFH)
-
-#endif                         /* __PDC_INTEL_H__ */
index 1ca450e..565341c 100644 (file)
 #ifdef ACPI_USE_STANDARD_HEADERS
 #include <stddef.h>
 #include <unistd.h>
+#include <stdint.h>
 
 #define ACPI_OFFSET(d, f)   offsetof(d, f)
 #endif
index 2f0d30c..703db4d 100644 (file)
@@ -10,9 +10,6 @@
 #ifndef __ACZEPHYR_H__
 #define __ACZEPHYR_H__
 
-#define SEEK_SET FS_SEEK_SET
-#define SEEK_END FS_SEEK_END
-
 #define ACPI_MACHINE_WIDTH      64
 
 #define ACPI_NO_ERROR_MESSAGES
diff --git a/include/acpi/proc_cap_intel.h b/include/acpi/proc_cap_intel.h
new file mode 100644 (file)
index 0000000..ddcdc41
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Vendor specific processor capabilities bit definition
+ * for Intel processors. Those bits are used to convey OSPM
+ * power management capabilities to the platform.
+ */
+
+#ifndef __PROC_CAP_INTEL_H__
+#define __PROC_CAP_INTEL_H__
+
+#define ACPI_PROC_CAP_P_FFH                    (0x0001)
+#define ACPI_PROC_CAP_C_C1_HALT                        (0x0002)
+#define ACPI_PROC_CAP_T_FFH                    (0x0004)
+#define ACPI_PROC_CAP_SMP_C1PT                 (0x0008)
+#define ACPI_PROC_CAP_SMP_C2C3                 (0x0010)
+#define ACPI_PROC_CAP_SMP_P_SWCOORD            (0x0020)
+#define ACPI_PROC_CAP_SMP_C_SWCOORD            (0x0040)
+#define ACPI_PROC_CAP_SMP_T_SWCOORD            (0x0080)
+#define ACPI_PROC_CAP_C_C1_FFH                 (0x0100)
+#define ACPI_PROC_CAP_C_C2C3_FFH               (0x0200)
+#define ACPI_PROC_CAP_SMP_P_HWCOORD            (0x0800)
+#define ACPI_PROC_CAP_COLLAB_PROC_PERF         (0x1000)
+
+#define ACPI_PROC_CAP_EST_CAPABILITY_SMP       (ACPI_PROC_CAP_SMP_C1PT | \
+                                                ACPI_PROC_CAP_C_C1_HALT | \
+                                                ACPI_PROC_CAP_P_FFH)
+
+#define ACPI_PROC_CAP_EST_CAPABILITY_SWSMP     (ACPI_PROC_CAP_SMP_C1PT | \
+                                                ACPI_PROC_CAP_C_C1_HALT | \
+                                                ACPI_PROC_CAP_SMP_P_SWCOORD | \
+                                                ACPI_PROC_CAP_SMP_P_HWCOORD | \
+                                                ACPI_PROC_CAP_P_FFH)
+
+#define ACPI_PROC_CAP_C_CAPABILITY_SMP         (ACPI_PROC_CAP_SMP_C2C3  | \
+                                                ACPI_PROC_CAP_SMP_C1PT  | \
+                                                ACPI_PROC_CAP_C_C1_HALT | \
+                                                ACPI_PROC_CAP_C_C1_FFH  | \
+                                                ACPI_PROC_CAP_C_C2C3_FFH)
+
+#endif /* __PROC_CAP_INTEL_H__ */
index 402a8c1..a8f4b65 100644 (file)
@@ -190,7 +190,7 @@ int hv_common_cpu_die(unsigned int cpu);
 
 void *hv_alloc_hyperv_page(void);
 void *hv_alloc_hyperv_zeroed_page(void);
-void hv_free_hyperv_page(unsigned long addr);
+void hv_free_hyperv_page(void *addr);
 
 /**
  * hv_cpu_number_to_vp_number() - Map CPU to VP.
index 0587354..9c59409 100644 (file)
                *(.text.unlikely .text.unlikely.*)                      \
                *(.text.unknown .text.unknown.*)                        \
                NOINSTR_TEXT                                            \
-               *(.text..refcount)                                      \
                *(.ref.text)                                            \
                *(.text.asan.* .text.tsan.*)                            \
        MEM_KEEP(init.text*)                                            \
index 20c93f0..95a1d21 100644 (file)
@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask)
        return (mask >> 8) ? byte : byte + 1;
 }
 
-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
 {
        unsigned long rhs = val | c->low_bits;
        *data = rhs;
index 02f2ac4..e69cece 100644 (file)
@@ -1537,7 +1537,7 @@ enum drm_dp_phy {
 
 #define DP_BRANCH_OUI_HEADER_SIZE      0xc
 #define DP_RECEIVER_CAP_SIZE           0xf
-#define DP_DSC_RECEIVER_CAP_SIZE        0xf
+#define DP_DSC_RECEIVER_CAP_SIZE        0x10 /* DSC Capabilities 0x60 through 0x6F */
 #define EDP_PSR_RECEIVER_CAP_SIZE      2
 #define EDP_DISPLAY_CTL_CAP_SIZE       3
 #define DP_LTTPR_COMMON_CAP_SIZE       8
index 169755d..48e93f9 100644 (file)
@@ -61,15 +61,9 @@ struct std_timing {
        u8 vfreq_aspect;
 } __attribute__((packed));
 
-#define DRM_EDID_PT_SYNC_MASK              (3 << 3)
-# define DRM_EDID_PT_ANALOG_CSYNC          (0 << 3)
-# define DRM_EDID_PT_BIPOLAR_ANALOG_CSYNC  (1 << 3)
-# define DRM_EDID_PT_DIGITAL_CSYNC         (2 << 3)
-#  define DRM_EDID_PT_CSYNC_ON_RGB         (1 << 1) /* analog csync only */
-#  define DRM_EDID_PT_CSYNC_SERRATE        (1 << 2)
-# define DRM_EDID_PT_DIGITAL_SEPARATE_SYNC (3 << 3)
-#  define DRM_EDID_PT_HSYNC_POSITIVE       (1 << 1) /* also digital csync */
-#  define DRM_EDID_PT_VSYNC_POSITIVE       (1 << 2)
+#define DRM_EDID_PT_HSYNC_POSITIVE (1 << 1)
+#define DRM_EDID_PT_VSYNC_POSITIVE (1 << 2)
+#define DRM_EDID_PT_SEPARATE_SYNC  (3 << 3)
 #define DRM_EDID_PT_STEREO         (1 << 5)
 #define DRM_EDID_PT_INTERLACED     (1 << 7)
 
index 4863b0f..375737f 100644 (file)
@@ -368,11 +368,6 @@ static inline void drm_fb_helper_deferred_io(struct fb_info *info,
 {
 }
 
-static inline int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper)
-{
-       return -ENODEV;
-}
-
 static inline void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper,
                                             bool suspend)
 {
index 4977e0a..fad3c40 100644 (file)
@@ -25,6 +25,7 @@ void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector);
 
 void drm_kms_helper_poll_disable(struct drm_device *dev);
 void drm_kms_helper_poll_enable(struct drm_device *dev);
+void drm_kms_helper_poll_reschedule(struct drm_device *dev);
 bool drm_kms_helper_is_poll_worker(void);
 
 enum drm_mode_status drm_crtc_helper_mode_valid_fixed(struct drm_crtc *crtc,
index e95b483..f9544d9 100644 (file)
@@ -583,15 +583,14 @@ void drm_sched_entity_set_priority(struct drm_sched_entity *entity,
 bool drm_sched_entity_is_ready(struct drm_sched_entity *entity);
 int drm_sched_entity_error(struct drm_sched_entity *entity);
 
-void drm_sched_fence_set_parent(struct drm_sched_fence *s_fence,
-                               struct dma_fence *fence);
 struct drm_sched_fence *drm_sched_fence_alloc(
        struct drm_sched_entity *s_entity, void *owner);
 void drm_sched_fence_init(struct drm_sched_fence *fence,
                          struct drm_sched_entity *entity);
 void drm_sched_fence_free(struct drm_sched_fence *fence);
 
-void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
+void drm_sched_fence_scheduled(struct drm_sched_fence *fence,
+                              struct dma_fence *parent);
 void drm_sched_fence_finished(struct drm_sched_fence *fence, int result);
 
 unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
index 402b545..5b27f94 100644 (file)
@@ -431,7 +431,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
 
 int vgic_v4_load(struct kvm_vcpu *vcpu);
 void vgic_v4_commit(struct kvm_vcpu *vcpu);
-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
+int vgic_v4_put(struct kvm_vcpu *vcpu);
 
 /* CPU HP callbacks */
 void kvm_vgic_cpu_up(void);
index 641dc48..a73246c 100644 (file)
@@ -477,8 +477,6 @@ static inline int acpi_get_node(acpi_handle handle)
        return 0;
 }
 #endif
-extern int acpi_paddr_to_node(u64 start_addr, u64 size);
-
 extern int pnpacpi_disabled;
 
 #define PXM_INVAL      (-1)
@@ -1100,7 +1098,7 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state,
 
 acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state,
                                           u32 val_a, u32 val_b);
-#ifdef CONFIG_X86
+#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86)
 struct acpi_s2idle_dev_ops {
        struct list_head list_node;
        void (*prepare)(void);
@@ -1109,7 +1107,13 @@ struct acpi_s2idle_dev_ops {
 };
 int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg);
 void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg);
-#endif /* CONFIG_X86 */
+int acpi_get_lps0_constraint(struct acpi_device *adev);
+#else /* CONFIG_SUSPEND && CONFIG_X86 */
+static inline int acpi_get_lps0_constraint(struct device *dev)
+{
+       return ACPI_STATE_UNKNOWN;
+}
+#endif /* CONFIG_SUSPEND && CONFIG_X86 */
 #ifndef CONFIG_IA64
 void arch_reserve_mem_area(acpi_physical_address addr, size_t size);
 #else
index ee7cb6a..1cb6559 100644 (file)
@@ -21,6 +21,7 @@
  */
 #define IORT_SMMU_V3_PMCG_GENERIC        0x00000000 /* Generic SMMUv3 PMCG */
 #define IORT_SMMU_V3_PMCG_HISI_HIP08     0x00000001 /* HiSilicon HIP08 PMCG */
+#define IORT_SMMU_V3_PMCG_HISI_HIP09     0x00000002 /* HiSilicon HIP09 PMCG */
 
 int iort_register_domain_token(int trans_id, phys_addr_t base,
                               struct fwnode_handle *fw_node);
index 14dc461..255701e 100644 (file)
@@ -47,10 +47,12 @@ int sdei_unregister_ghes(struct ghes *ghes);
 int sdei_mask_local_cpu(void);
 int sdei_unmask_local_cpu(void);
 void __init sdei_init(void);
+void sdei_handler_abort(void);
 #else
 static inline int sdei_mask_local_cpu(void) { return 0; }
 static inline int sdei_unmask_local_cpu(void) { return 0; }
 static inline void sdei_init(void) { }
+static inline void sdei_handler_abort(void) { }
 #endif /* CONFIG_ARM_SDE_INTERFACE */
 
 
index c4f5b52..11984ed 100644 (file)
@@ -791,7 +791,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
 static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
 {
        bio->bi_opf |= REQ_POLLED;
-       if (!is_sync_kiocb(kiocb))
+       if (kiocb->ki_flags & IOCB_NOWAIT)
                bio->bi_opf |= REQ_NOWAIT;
 }
 
index e6802b6..90ab33c 100644 (file)
@@ -111,6 +111,7 @@ struct blk_crypto_profile {
         * keyslots while ensuring that they can't be changed concurrently.
         */
        struct rw_semaphore lock;
+       struct lock_class_key lockdep_key;
 
        /* List of idle slots, with least recently used slot at front */
        wait_queue_head_t idle_slots_wait_queue;
index 2b7fb8e..495ca19 100644 (file)
@@ -158,13 +158,13 @@ struct request {
 
        /*
         * The rb_node is only used inside the io scheduler, requests
-        * are pruned when moved to the dispatch queue. So let the
-        * completion_data share space with the rb_node.
+        * are pruned when moved to the dispatch queue. special_vec must
+        * only be used if RQF_SPECIAL_PAYLOAD is set, and those cannot be
+        * insert into an IO scheduler.
         */
        union {
                struct rb_node rb_node; /* sort/lookup */
                struct bio_vec special_vec;
-               void *completion_data;
        };
 
        /*
@@ -397,8 +397,6 @@ struct blk_mq_hw_ctx {
         */
        struct blk_mq_tags      *sched_tags;
 
-       /** @queued: Number of queued requests. */
-       unsigned long           queued;
        /** @run: Number of dispatched requests. */
        unsigned long           run;
 
index 0bad62c..d5c5e59 100644 (file)
@@ -52,7 +52,6 @@ struct block_device {
        atomic_t                bd_openers;
        spinlock_t              bd_size_lock; /* for bd_inode->i_size updates */
        struct inode *          bd_inode;       /* will die */
-       struct super_block *    bd_super;
        void *                  bd_claiming;
        void *                  bd_holder;
        const struct blk_holder_ops *bd_holder_ops;
index ed44a99..83ce873 100644 (file)
@@ -750,7 +750,8 @@ static inline int bdev_read_only(struct block_device *bdev)
 }
 
 bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-bool disk_force_media_change(struct gendisk *disk, unsigned int events);
+void disk_force_media_change(struct gendisk *disk);
+void bdev_mark_dead(struct block_device *bdev, bool surprise);
 
 void add_disk_randomness(struct gendisk *disk) __latent_entropy;
 void rand_initialize_disk(struct gendisk *disk);
@@ -809,7 +810,6 @@ int __register_blkdev(unsigned int major, const char *name,
 void unregister_blkdev(unsigned int major, const char *name);
 
 bool disk_check_media_change(struct gendisk *disk);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty);
 void set_capacity(struct gendisk *disk, sector_t size);
 
 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
@@ -969,7 +969,6 @@ struct blk_plug {
 
        bool multiple_queues;
        bool has_elevator;
-       bool nowait;
 
        struct list_head cb_list; /* md requires an unplug callback */
 };
@@ -1461,9 +1460,16 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
 #endif
 
 struct blk_holder_ops {
-       void (*mark_dead)(struct block_device *bdev);
+       void (*mark_dead)(struct block_device *bdev, bool surprise);
+
+       /*
+        * Sync the file system mounted on the block device.
+        */
+       void (*sync)(struct block_device *bdev);
 };
 
+extern const struct blk_holder_ops fs_holder_ops;
+
 /*
  * Return the correct open flags for blkdev_get_by_* for super block flags
  * as stored in sb->s_flags.
@@ -1522,8 +1528,6 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
 }
 #endif /* CONFIG_BLOCK */
 
-int fsync_bdev(struct block_device *bdev);
-
 int freeze_bdev(struct block_device *bdev);
 int thaw_bdev(struct block_device *bdev);
 
index 8a0d546..ae20dbb 100644 (file)
@@ -661,6 +661,8 @@ struct cgroup_subsys {
        void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu);
        int (*css_extra_stat_show)(struct seq_file *seq,
                                   struct cgroup_subsys_state *css);
+       int (*css_local_stat_show)(struct seq_file *seq,
+                                  struct cgroup_subsys_state *css);
 
        int (*can_attach)(struct cgroup_taskset *tset);
        void (*cancel_attach)(struct cgroup_taskset *tset);
index 1ef0133..06f1b29 100644 (file)
@@ -183,6 +183,39 @@ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale);
  */
 bool clk_is_match(const struct clk *p, const struct clk *q);
 
+/**
+ * clk_rate_exclusive_get - get exclusivity over the rate control of a
+ *                          producer
+ * @clk: clock source
+ *
+ * This function allows drivers to get exclusive control over the rate of a
+ * provider. It prevents any other consumer to execute, even indirectly,
+ * opereation which could alter the rate of the provider or cause glitches
+ *
+ * If exlusivity is claimed more than once on clock, even by the same driver,
+ * the rate effectively gets locked as exclusivity can't be preempted.
+ *
+ * Must not be called from within atomic context.
+ *
+ * Returns success (0) or negative errno.
+ */
+int clk_rate_exclusive_get(struct clk *clk);
+
+/**
+ * clk_rate_exclusive_put - release exclusivity over the rate control of a
+ *                          producer
+ * @clk: clock source
+ *
+ * This function allows drivers to release the exclusivity it previously got
+ * from clk_rate_exclusive_get()
+ *
+ * The caller must balance the number of clk_rate_exclusive_get() and
+ * clk_rate_exclusive_put() calls.
+ *
+ * Must not be called from within atomic context.
+ */
+void clk_rate_exclusive_put(struct clk *clk);
+
 #else
 
 static inline int clk_notifier_register(struct clk *clk,
@@ -236,6 +269,13 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q)
        return p == q;
 }
 
+static inline int clk_rate_exclusive_get(struct clk *clk)
+{
+       return 0;
+}
+
+static inline void clk_rate_exclusive_put(struct clk *clk) {}
+
 #endif
 
 #ifdef CONFIG_HAVE_CLK_PREPARE
@@ -583,38 +623,6 @@ struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id);
  */
 struct clk *devm_get_clk_from_child(struct device *dev,
                                    struct device_node *np, const char *con_id);
-/**
- * clk_rate_exclusive_get - get exclusivity over the rate control of a
- *                          producer
- * @clk: clock source
- *
- * This function allows drivers to get exclusive control over the rate of a
- * provider. It prevents any other consumer to execute, even indirectly,
- * opereation which could alter the rate of the provider or cause glitches
- *
- * If exlusivity is claimed more than once on clock, even by the same driver,
- * the rate effectively gets locked as exclusivity can't be preempted.
- *
- * Must not be called from within atomic context.
- *
- * Returns success (0) or negative errno.
- */
-int clk_rate_exclusive_get(struct clk *clk);
-
-/**
- * clk_rate_exclusive_put - release exclusivity over the rate control of a
- *                          producer
- * @clk: clock source
- *
- * This function allows drivers to release the exclusivity it previously got
- * from clk_rate_exclusive_get()
- *
- * The caller must balance the number of clk_rate_exclusive_get() and
- * clk_rate_exclusive_put() calls.
- *
- * Must not be called from within atomic context.
- */
-void clk_rate_exclusive_put(struct clk *clk);
 
 /**
  * clk_enable - inform the system when the clock source should be running.
@@ -974,14 +982,6 @@ static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {}
 
 static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
 
-
-static inline int clk_rate_exclusive_get(struct clk *clk)
-{
-       return 0;
-}
-
-static inline void clk_rate_exclusive_put(struct clk *clk) {}
-
 static inline int clk_enable(struct clk *clk)
 {
        return 0;
index 00efa35..2856662 100644 (file)
 #endif
 
 /*
+ * Optional: only supported since gcc >= 14
+ * Optional: only supported since clang >= 18
+ *
+ *   gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
+ * clang: https://reviews.llvm.org/D148381
+ */
+#if __has_attribute(__counted_by__)
+# define __counted_by(member)          __attribute__((__counted_by__(member)))
+#else
+# define __counted_by(member)
+#endif
+
+/*
  * Optional: not supported by gcc
  * Optional: only supported since clang >= 14.0
  *
 #endif
 
 /*
- * Optional: only supported since gcc >= 14
- * Optional: only supported since clang >= 17
- *
- *   gcc: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108896
- * clang: https://reviews.llvm.org/D148381
- */
-#if __has_attribute(__element_count__)
-# define __counted_by(member)          __attribute__((__element_count__(#member)))
-#else
-# define __counted_by(member)
-#endif
-
-/*
  * Optional: only supported since clang >= 14.0
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-error-function-attribute
index 547ea1f..c523c66 100644 (file)
@@ -106,6 +106,34 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { }
 #define __cold
 #endif
 
+/*
+ * On x86-64 and arm64 targets, __preserve_most changes the calling convention
+ * of a function to make the code in the caller as unintrusive as possible. This
+ * convention behaves identically to the C calling convention on how arguments
+ * and return values are passed, but uses a different set of caller- and callee-
+ * saved registers.
+ *
+ * The purpose is to alleviates the burden of saving and recovering a large
+ * register set before and after the call in the caller.  This is beneficial for
+ * rarely taken slow paths, such as error-reporting functions that may be called
+ * from hot paths.
+ *
+ * Note: This may conflict with instrumentation inserted on function entry which
+ * does not use __preserve_most or equivalent convention (if in assembly). Since
+ * function tracing assumes the normal C calling convention, where the attribute
+ * is supported, __preserve_most implies notrace.  It is recommended to restrict
+ * use of the attribute to functions that should or already disable tracing.
+ *
+ * Optional: not supported by gcc.
+ *
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#preserve-most
+ */
+#if __has_attribute(__preserve_most__) && (defined(CONFIG_X86_64) || defined(CONFIG_ARM64))
+# define __preserve_most notrace __attribute__((__preserve_most__))
+#else
+# define __preserve_most
+#endif
+
 /* Builtins */
 
 /*
index 62b32b1..fb29156 100644 (file)
@@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x);
 extern bool completion_done(struct completion *x);
 
 extern void complete(struct completion *);
+extern void complete_on_current_cpu(struct completion *x);
 extern void complete_all(struct completion *);
 
 #endif
index 6e6e57e..0abd60a 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/compiler.h>
 #include <linux/cpumask.h>
 #include <linux/cpuhotplug.h>
+#include <linux/cpu_smt.h>
 
 struct device;
 struct device_node;
@@ -70,6 +71,10 @@ extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
                                        char *buf);
 extern ssize_t cpu_show_retbleed(struct device *dev,
                                 struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev,
+                                            struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_gds(struct device *dev,
+                           struct device_attribute *attr, char *buf);
 
 extern __printf(4, 5)
 struct device *cpu_device_create(struct device *parent, void *drvdata,
@@ -190,7 +195,6 @@ void arch_cpu_finalize_init(void);
 static inline void arch_cpu_finalize_init(void) { }
 #endif
 
-void cpu_set_state_online(int cpu);
 void play_idle_precise(u64 duration_ns, u64 latency_ns);
 
 static inline void play_idle(unsigned long duration_us)
@@ -204,30 +208,6 @@ void cpuhp_report_idle_dead(void);
 static inline void cpuhp_report_idle_dead(void) { }
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
-enum cpuhp_smt_control {
-       CPU_SMT_ENABLED,
-       CPU_SMT_DISABLED,
-       CPU_SMT_FORCE_DISABLED,
-       CPU_SMT_NOT_SUPPORTED,
-       CPU_SMT_NOT_IMPLEMENTED,
-};
-
-#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
-extern enum cpuhp_smt_control cpu_smt_control;
-extern void cpu_smt_disable(bool force);
-extern void cpu_smt_check_topology(void);
-extern bool cpu_smt_possible(void);
-extern int cpuhp_smt_enable(void);
-extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
-#else
-# define cpu_smt_control               (CPU_SMT_NOT_IMPLEMENTED)
-static inline void cpu_smt_disable(bool force) { }
-static inline void cpu_smt_check_topology(void) { }
-static inline bool cpu_smt_possible(void) { return false; }
-static inline int cpuhp_smt_enable(void) { return 0; }
-static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
-#endif
-
 extern bool cpu_mitigations_off(void);
 extern bool cpu_mitigations_auto_nosmt(void);
 
diff --git a/include/linux/cpu_smt.h b/include/linux/cpu_smt.h
new file mode 100644 (file)
index 0000000..0c16642
--- /dev/null
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_CPU_SMT_H_
+#define _LINUX_CPU_SMT_H_
+
+enum cpuhp_smt_control {
+       CPU_SMT_ENABLED,
+       CPU_SMT_DISABLED,
+       CPU_SMT_FORCE_DISABLED,
+       CPU_SMT_NOT_SUPPORTED,
+       CPU_SMT_NOT_IMPLEMENTED,
+};
+
+#if defined(CONFIG_SMP) && defined(CONFIG_HOTPLUG_SMT)
+extern enum cpuhp_smt_control cpu_smt_control;
+extern unsigned int cpu_smt_num_threads;
+extern void cpu_smt_disable(bool force);
+extern void cpu_smt_set_num_threads(unsigned int num_threads,
+                                   unsigned int max_threads);
+extern bool cpu_smt_possible(void);
+extern int cpuhp_smt_enable(void);
+extern int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval);
+#else
+# define cpu_smt_control               (CPU_SMT_NOT_IMPLEMENTED)
+# define cpu_smt_num_threads 1
+static inline void cpu_smt_disable(bool force) { }
+static inline void cpu_smt_set_num_threads(unsigned int num_threads,
+                                          unsigned int max_threads) { }
+static inline bool cpu_smt_possible(void) { return false; }
+static inline int cpuhp_smt_enable(void) { return 0; }
+static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; }
+#endif
+
+#endif /* _LINUX_CPU_SMT_H_ */
index 172ff51..43b363a 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/pm_qos.h>
 #include <linux/spinlock.h>
 #include <linux/sysfs.h>
+#include <linux/minmax.h>
 
 /*********************************************************************
  *                        CPUFREQ INTERFACE                          *
@@ -370,7 +371,7 @@ struct cpufreq_driver {
        int             (*target_intermediate)(struct cpufreq_policy *policy,
                                               unsigned int index);
 
-       /* should be defined, if possible */
+       /* should be defined, if possible, return 0 on error */
        unsigned int    (*get)(unsigned int cpu);
 
        /* Called to update policy limits on firmware notifications. */
@@ -467,17 +468,8 @@ static inline void cpufreq_verify_within_limits(struct cpufreq_policy_data *poli
                                                unsigned int min,
                                                unsigned int max)
 {
-       if (policy->min < min)
-               policy->min = min;
-       if (policy->max < min)
-               policy->max = min;
-       if (policy->min > max)
-               policy->min = max;
-       if (policy->max > max)
-               policy->max = max;
-       if (policy->min > policy->max)
-               policy->min = policy->max;
-       return;
+       policy->max = clamp(policy->max, min, max);
+       policy->min = clamp(policy->min, min, policy->max);
 }
 
 static inline void
index 25b6e6e..06dda85 100644 (file)
@@ -48,7 +48,7 @@
  *    same section.
  *
  * If neither #1 nor #2 apply, please use the dynamic state space when
- * setting up a state by using CPUHP_PREPARE_DYN or CPUHP_PREPARE_ONLINE
+ * setting up a state by using CPUHP_BP_PREPARE_DYN or CPUHP_AP_ONLINE_DYN
  * for the @state argument of the setup function.
  *
  * See Documentation/core-api/cpu_hotplug.rst for further information and
index 0d2e2a3..f10fb87 100644 (file)
@@ -175,8 +175,8 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
 
 /**
  * cpumask_first_and - return the first cpu from *srcp1 & *srcp2
- * @src1p: the first input
- * @src2p: the second input
+ * @srcp1: the first input
+ * @srcp2: the second input
  *
  * Returns >= nr_cpu_ids if no cpus set in both.  See also cpumask_next_and().
  */
@@ -1197,6 +1197,10 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
 /**
  * cpumap_print_list_to_buf  - copies the cpumask into the buffer as
  *     comma-separated list of cpus
+ * @buf: the buffer to copy into
+ * @mask: the cpumask to copy
+ * @off: in the string from which we are copying, we copy to @buf
+ * @count: the maximum number of bytes to print
  *
  * Everything is same with the above cpumap_print_bitmask_to_buf()
  * except the print format.
index 9192986..ac86242 100644 (file)
@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size)
        if (!malloc_ptr)
                malloc_ptr = free_mem_ptr;
 
-       malloc_ptr = (malloc_ptr + 3) & ~3;     /* Align */
+       malloc_ptr = (malloc_ptr + 7) & ~7;     /* Align */
 
        p = (void *)malloc_ptr;
        malloc_ptr += size;
index 552b817..3ac6dba 100644 (file)
@@ -12,7 +12,7 @@ extern struct list_head dm_verity_loadpin_trusted_root_digests;
 struct dm_verity_loadpin_trusted_root_digest {
        struct list_head node;
        unsigned int len;
-       u8 data[];
+       u8 data[] __counted_by(len);
 };
 
 #if IS_ENABLED(CONFIG_SECURITY_LOADPIN_VERITY)
index d54b595..0d678e9 100644 (file)
@@ -606,7 +606,7 @@ static inline signed long dma_fence_wait(struct dma_fence *fence, bool intr)
 void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline);
 
 struct dma_fence *dma_fence_get_stub(void);
-struct dma_fence *dma_fence_allocate_private_stub(void);
+struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp);
 u64 dma_fence_context_alloc(unsigned num);
 
 extern const struct dma_fence_ops dma_fence_array_ops;
index b1d26f9..9f183a6 100644 (file)
@@ -30,7 +30,7 @@ struct dnotify_struct {
                            FS_MOVED_FROM | FS_MOVED_TO)
 
 extern void dnotify_flush(struct file *, fl_owner_t);
-extern int fcntl_dirnotify(int, struct file *, unsigned long);
+extern int fcntl_dirnotify(int, struct file *, unsigned int);
 
 #else
 
@@ -38,7 +38,7 @@ static inline void dnotify_flush(struct file *filp, fl_owner_t id)
 {
 }
 
-static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
 {
        return -EINVAL;
 }
index ab088c6..5a1e39d 100644 (file)
@@ -726,7 +726,6 @@ static inline efi_status_t efi_query_variable_store(u32 attributes,
        return EFI_SUCCESS;
 }
 #endif
-extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 
 extern int __init __efi_memmap_init(struct efi_memory_map_data *data);
 extern int __init efi_memmap_init_early(struct efi_memory_map_data *data);
@@ -1130,7 +1129,7 @@ extern bool efi_runtime_disabled(void);
 static inline bool efi_runtime_disabled(void) { return true; }
 #endif
 
-extern void efi_call_virt_check_flags(unsigned long flags, const char *call);
+extern void efi_call_virt_check_flags(unsigned long flags, const void *caller);
 extern unsigned long efi_call_virt_save_flags(void);
 
 enum efi_secureboot_mode {
@@ -1171,8 +1170,7 @@ static inline void efi_check_for_embedded_firmwares(void) { }
 #define arch_efi_call_virt(p, f, args...)      ((p)->f(args))
 
 /*
- * Arch code can implement the following three template macros, avoiding
- * reptition for the void/non-void return cases of {__,}efi_call_virt():
+ * Arch code must implement the following three routines:
  *
  *  * arch_efi_call_virt_setup()
  *
@@ -1181,9 +1179,8 @@ static inline void efi_check_for_embedded_firmwares(void) { }
  *
  *  * arch_efi_call_virt()
  *
- *    Performs the call. The last expression in the macro must be the call
- *    itself, allowing the logic to be shared by the void and non-void
- *    cases.
+ *    Performs the call. This routine takes a variable number of arguments so
+ *    it must be implemented as a variadic preprocessor macro.
  *
  *  * arch_efi_call_virt_teardown()
  *
@@ -1192,33 +1189,20 @@ static inline void efi_check_for_embedded_firmwares(void) { }
 
 #define efi_call_virt_pointer(p, f, args...)                           \
 ({                                                                     \
-       efi_status_t __s;                                               \
+       typeof((p)->f(args)) __s;                                       \
        unsigned long __flags;                                          \
                                                                        \
        arch_efi_call_virt_setup();                                     \
                                                                        \
        __flags = efi_call_virt_save_flags();                           \
        __s = arch_efi_call_virt(p, f, args);                           \
-       efi_call_virt_check_flags(__flags, __stringify(f));             \
+       efi_call_virt_check_flags(__flags, NULL);                       \
                                                                        \
        arch_efi_call_virt_teardown();                                  \
                                                                        \
        __s;                                                            \
 })
 
-#define __efi_call_virt_pointer(p, f, args...)                         \
-({                                                                     \
-       unsigned long __flags;                                          \
-                                                                       \
-       arch_efi_call_virt_setup();                                     \
-                                                                       \
-       __flags = efi_call_virt_save_flags();                           \
-       arch_efi_call_virt(p, f, args);                                 \
-       efi_call_virt_check_flags(__flags, __stringify(f));             \
-                                                                       \
-       arch_efi_call_virt_teardown();                                  \
-})
-
 #define EFI_RANDOM_SEED_SIZE           32U // BLAKE2S_HASH_SIZE
 
 struct linux_efi_random_seed {
@@ -1244,6 +1228,10 @@ extern int efi_tpm_final_log_size;
 
 extern unsigned long rci2_table_phys;
 
+efi_status_t
+efi_call_acpi_prm_handler(efi_status_t (__efiapi *handler_addr)(u64, void *),
+                         u64 param_buffer_addr, void *context);
+
 /*
  * efi_runtime_service() function identifiers.
  * "NONE" is used by efi_recover_from_page_fault() to check if the page
@@ -1263,25 +1251,26 @@ enum efi_rts_ids {
        EFI_RESET_SYSTEM,
        EFI_UPDATE_CAPSULE,
        EFI_QUERY_CAPSULE_CAPS,
+       EFI_ACPI_PRM_HANDLER,
 };
 
+union efi_rts_args;
+
 /*
  * efi_runtime_work:   Details of EFI Runtime Service work
- * @arg<1-5>:          EFI Runtime Service function arguments
+ * @args:              Pointer to union describing the arguments
  * @status:            Status of executing EFI Runtime Service
  * @efi_rts_id:                EFI Runtime Service function identifier
  * @efi_rts_comp:      Struct used for handling completions
+ * @caller:            The caller of the runtime service
  */
 struct efi_runtime_work {
-       void *arg1;
-       void *arg2;
-       void *arg3;
-       void *arg4;
-       void *arg5;
-       efi_status_t status;
-       struct work_struct work;
-       enum efi_rts_ids efi_rts_id;
-       struct completion efi_rts_comp;
+       union efi_rts_args      *args;
+       efi_status_t            status;
+       struct work_struct      work;
+       enum efi_rts_ids        efi_rts_id;
+       struct completion       efi_rts_comp;
+       const void              *caller;
 };
 
 extern struct efi_runtime_work efi_rts_work;
index efcdd16..95e868e 100644 (file)
@@ -144,7 +144,7 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int,
                        struct flock64 *);
 #endif
 
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
+int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
 int fcntl_getlease(struct file *filp);
 
 /* fs/locks.c */
@@ -167,8 +167,8 @@ bool vfs_inode_has_locks(struct inode *inode);
 int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
 int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
 void lease_get_mtime(struct inode *, struct timespec64 *time);
-int generic_setlease(struct file *, long, struct file_lock **, void **priv);
-int vfs_setlease(struct file *, long, struct file_lock **, void **);
+int generic_setlease(struct file *, int, struct file_lock **, void **priv);
+int vfs_setlease(struct file *, int, struct file_lock **, void **);
 int lease_modify(struct file_lock *, int, struct list_head *);
 
 struct notifier_block;
@@ -213,7 +213,7 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file,
        return -EACCES;
 }
 #endif
-static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
 {
        return -EINVAL;
 }
@@ -306,13 +306,13 @@ static inline void lease_get_mtime(struct inode *inode,
        return;
 }
 
-static inline int generic_setlease(struct file *filp, long arg,
+static inline int generic_setlease(struct file *filp, int arg,
                                    struct file_lock **flp, void **priv)
 {
        return -EINVAL;
 }
 
-static inline int vfs_setlease(struct file *filp, long arg,
+static inline int vfs_setlease(struct file *filp, int arg,
                               struct file_lock **lease, void **priv)
 {
        return -EINVAL;
index 6867512..dda08d9 100644 (file)
@@ -338,6 +338,20 @@ enum rw_hint {
 #define IOCB_NOIO              (1 << 20)
 /* can use bio alloc cache */
 #define IOCB_ALLOC_CACHE       (1 << 21)
+/*
+ * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
+ * iocb completion can be passed back to the owner for execution from a safe
+ * context rather than needing to be punted through a workqueue. If this
+ * flag is set, the bio completion handling may set iocb->dio_complete to a
+ * handler function and iocb->private to context information for that handler.
+ * The issuer should call the handler with that context information from task
+ * context to complete the processing of the iocb. Note that while this
+ * provides a task context for the dio_complete() callback, it should only be
+ * used on the completion side for non-IO generating completions. It's fine to
+ * call blocking functions from this callback, but they should not wait for
+ * unrelated IO (like cache flushing, new IO generation, etc).
+ */
+#define IOCB_DIO_CALLER_COMP   (1 << 22)
 
 /* for use in trace events */
 #define TRACE_IOCB_STRINGS \
@@ -351,7 +365,8 @@ enum rw_hint {
        { IOCB_WRITE,           "WRITE" }, \
        { IOCB_WAITQ,           "WAITQ" }, \
        { IOCB_NOIO,            "NOIO" }, \
-       { IOCB_ALLOC_CACHE,     "ALLOC_CACHE" }
+       { IOCB_ALLOC_CACHE,     "ALLOC_CACHE" }, \
+       { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }
 
 struct kiocb {
        struct file             *ki_filp;
@@ -360,7 +375,23 @@ struct kiocb {
        void                    *private;
        int                     ki_flags;
        u16                     ki_ioprio; /* See linux/ioprio.h */
-       struct wait_page_queue  *ki_waitq; /* for async buffered IO */
+       union {
+               /*
+                * Only used for async buffered reads, where it denotes the
+                * page waitqueue associated with completing the read. Valid
+                * IFF IOCB_WAITQ is set.
+                */
+               struct wait_page_queue  *ki_waitq;
+               /*
+                * Can be used for O_DIRECT IO, where the completion handling
+                * is punted back to the issuer of the IO. May only be set
+                * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
+                * must then check for presence of this handler when ki_complete
+                * is invoked. The data passed in to this handler must be
+                * assigned to ->private when dio_complete is assigned.
+                */
+               ssize_t (*dio_complete)(void *data);
+       };
 };
 
 static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -642,7 +673,7 @@ struct inode {
        loff_t                  i_size;
        struct timespec64       i_atime;
        struct timespec64       i_mtime;
-       struct timespec64       i_ctime;
+       struct timespec64       __i_ctime; /* use inode_*_ctime accessors! */
        spinlock_t              i_lock; /* i_blocks, i_bytes, maybe i_size */
        unsigned short          i_bytes;
        u8                      i_blkbits;
@@ -1069,7 +1100,7 @@ extern void fasync_free(struct fasync_struct *);
 extern void kill_fasync(struct fasync_struct **, int, int);
 
 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern int f_setown(struct file *filp, unsigned long arg, int force);
+extern int f_setown(struct file *filp, int who, int force);
 extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);
@@ -1095,6 +1126,8 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_LAZYTIME     BIT(25)        /* Update the on-disk [acm]times lazily */
 
 /* These sb flags are internal to the kernel */
+#define SB_DEAD         BIT(21)
+#define SB_DYING        BIT(24)
 #define SB_SUBMOUNT     BIT(26)
 #define SB_FORCE        BIT(27)
 #define SB_NOSEC        BIT(28)
@@ -1147,7 +1180,8 @@ enum {
 #define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
 
 struct sb_writers {
-       int                             frozen;         /* Is sb frozen? */
+       unsigned short                  frozen;         /* Is sb frozen? */
+       unsigned short                  freeze_holders; /* Who froze fs? */
        struct percpu_rw_semaphore      rw_sem[SB_FREEZE_LEVELS];
 };
 
@@ -1474,7 +1508,79 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
               kgid_has_mapping(fs_userns, kgid);
 }
 
-extern struct timespec64 current_time(struct inode *inode);
+struct timespec64 current_mgtime(struct inode *inode);
+struct timespec64 current_time(struct inode *inode);
+struct timespec64 inode_set_ctime_current(struct inode *inode);
+
+/*
+ * Multigrain timestamps
+ *
+ * Conditionally use fine-grained ctime and mtime timestamps when there
+ * are users actively observing them via getattr. The primary use-case
+ * for this is NFS clients that use the ctime to distinguish between
+ * different states of the file, and that are often fooled by multiple
+ * operations that occur in the same coarse-grained timer tick.
+ *
+ * The kernel always keeps normalized struct timespec64 values in the ctime,
+ * which means that only the first 30 bits of the value are used. Use the
+ * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value
+ * has been queried since it was last updated.
+ */
+#define I_CTIME_QUERIED                (1L<<30)
+
+/**
+ * inode_get_ctime - fetch the current ctime from the inode
+ * @inode: inode from which to fetch ctime
+ *
+ * Grab the current ctime tv_nsec field from the inode, mask off the
+ * I_CTIME_QUERIED flag and return it. This is mostly intended for use by
+ * internal consumers of the ctime that aren't concerned with ensuring a
+ * fine-grained update on the next change (e.g. when preparing to store
+ * the value in the backing store for later retrieval).
+ *
+ * This is safe to call regardless of whether the underlying filesystem
+ * is using multigrain timestamps.
+ */
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+       struct timespec64 ctime;
+
+       ctime.tv_sec = inode->__i_ctime.tv_sec;
+       ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED;
+
+       return ctime;
+}
+
+/**
+ * inode_set_ctime_to_ts - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @ts: value to set in the ctime field
+ *
+ * Set the ctime in @inode to @ts
+ */
+static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
+                                                     struct timespec64 ts)
+{
+       inode->__i_ctime = ts;
+       return ts;
+}
+
+/**
+ * inode_set_ctime - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @sec: tv_sec value to set
+ * @nsec: tv_nsec value to set
+ *
+ * Set the ctime in @inode to { @sec, @nsec }
+ */
+static inline struct timespec64 inode_set_ctime(struct inode *inode,
+                                               time64_t sec, long nsec)
+{
+       struct timespec64 ts = { .tv_sec  = sec,
+                                .tv_nsec = nsec };
+
+       return inode_set_ctime_to_ts(inode, ts);
+}
 
 /*
  * Snapshotting support.
@@ -1770,6 +1876,7 @@ struct dir_context {
 
 struct iov_iter;
 struct io_uring_cmd;
+struct offset_ctx;
 
 struct file_operations {
        struct module *owner;
@@ -1780,7 +1887,6 @@ struct file_operations {
        ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
        int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
                        unsigned int flags);
-       int (*iterate) (struct file *, struct dir_context *);
        int (*iterate_shared) (struct file *, struct dir_context *);
        __poll_t (*poll) (struct file *, struct poll_table_struct *);
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -1799,7 +1905,7 @@ struct file_operations {
        ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
        ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
        void (*splice_eof)(struct file *file);
-       int (*setlease)(struct file *, long, struct file_lock **, void **);
+       int (*setlease)(struct file *, int, struct file_lock **, void **);
        long (*fallocate)(struct file *file, int mode, loff_t offset,
                          loff_t len);
        void (*show_fdinfo)(struct seq_file *m, struct file *f);
@@ -1817,6 +1923,13 @@ struct file_operations {
                                unsigned int poll_flags);
 } __randomize_layout;
 
+/* Wrap a directory iterator that needs exclusive inode access */
+int wrap_directory_iterator(struct file *, struct dir_context *,
+                           int (*) (struct file *, struct dir_context *));
+#define WRAP_DIR_ITER(x) \
+       static int shared_##x(struct file *file , struct dir_context *ctx) \
+       { return wrap_directory_iterator(file, ctx, x); }
+
 struct inode_operations {
        struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
        const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
@@ -1844,7 +1957,7 @@ struct inode_operations {
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
                      u64 len);
-       int (*update_time)(struct inode *, struct timespec64 *, int);
+       int (*update_time)(struct inode *, int);
        int (*atomic_open)(struct inode *, struct dentry *,
                           struct file *, unsigned open_flag,
                           umode_t create_mode);
@@ -1857,6 +1970,7 @@ struct inode_operations {
        int (*fileattr_set)(struct mnt_idmap *idmap,
                            struct dentry *dentry, struct fileattr *fa);
        int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+       struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
 } ____cacheline_aligned;
 
 static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -1902,6 +2016,10 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
                                        struct file *dst_file, loff_t dst_pos,
                                        loff_t len, unsigned int remap_flags);
 
+enum freeze_holder {
+       FREEZE_HOLDER_KERNEL    = (1U << 0),
+       FREEZE_HOLDER_USERSPACE = (1U << 1),
+};
 
 struct super_operations {
        struct inode *(*alloc_inode)(struct super_block *sb);
@@ -1914,9 +2032,9 @@ struct super_operations {
        void (*evict_inode) (struct inode *);
        void (*put_super) (struct super_block *);
        int (*sync_fs)(struct super_block *sb, int wait);
-       int (*freeze_super) (struct super_block *);
+       int (*freeze_super) (struct super_block *, enum freeze_holder who);
        int (*freeze_fs) (struct super_block *);
-       int (*thaw_super) (struct super_block *);
+       int (*thaw_super) (struct super_block *, enum freeze_holder who);
        int (*unfreeze_fs) (struct super_block *);
        int (*statfs) (struct dentry *, struct kstatfs *);
        int (*remount_fs) (struct super_block *, int *, char *);
@@ -2194,7 +2312,7 @@ enum file_time_flags {
 
 extern bool atime_needs_update(const struct path *, struct inode *);
 extern void touch_atime(const struct path *);
-int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
+int inode_update_time(struct inode *inode, int flags);
 
 static inline void file_accessed(struct file *file)
 {
@@ -2216,6 +2334,7 @@ struct file_system_type {
 #define FS_USERNS_MOUNT                8       /* Can be mounted by userns root */
 #define FS_DISALLOW_NOTIFY_PERM        16      /* Disable fanotify permission events */
 #define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
+#define FS_MGTIME              64      /* FS uses multigrain timestamps */
 #define FS_RENAME_DOES_D_MOVE  32768   /* FS will handle d_move() during rename() internally. */
        int (*init_fs_context)(struct fs_context *);
        const struct fs_parameter_spec *parameters;
@@ -2239,6 +2358,17 @@ struct file_system_type {
 
 #define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
 
+/**
+ * is_mgtime: is this inode using multigrain timestamps
+ * @inode: inode to test for multigrain timestamps
+ *
+ * Return true if the inode uses multigrain timestamps, false otherwise.
+ */
+static inline bool is_mgtime(const struct inode *inode)
+{
+       return inode->i_sb->s_type->fs_flags & FS_MGTIME;
+}
+
 extern struct dentry *mount_bdev(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data,
        int (*fill_super)(struct super_block *, void *, int));
@@ -2290,8 +2420,8 @@ extern int unregister_filesystem(struct file_system_type *);
 extern int vfs_statfs(const struct path *, struct kstatfs *);
 extern int user_statfs(const char __user *, struct kstatfs *);
 extern int fd_statfs(int, struct kstatfs *);
-extern int freeze_super(struct super_block *super);
-extern int thaw_super(struct super_block *super);
+int freeze_super(struct super_block *super, enum freeze_holder who);
+int thaw_super(struct super_block *super, enum freeze_holder who);
 extern __printf(2, 3)
 int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
 extern int super_setup_bdi(struct super_block *sb);
@@ -2300,7 +2430,8 @@ extern int current_umask(void);
 
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
-extern int generic_update_time(struct inode *, struct timespec64 *, int);
+int inode_update_timestamps(struct inode *inode, int flags);
+int generic_update_time(struct inode *, int);
 
 /* /sys/fs */
 extern struct kobject *fs_kobj;
@@ -2539,6 +2670,13 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
        return (inode->i_mode ^ mode) & S_IFMT;
 }
 
+/**
+ * file_start_write - get write access to a superblock for regular file io
+ * @file: the file we want to write to
+ *
+ * This is a variant of sb_start_write() which is a noop on non-regualr file.
+ * Should be matched with a call to file_end_write().
+ */
 static inline void file_start_write(struct file *file)
 {
        if (!S_ISREG(file_inode(file)->i_mode))
@@ -2553,11 +2691,53 @@ static inline bool file_start_write_trylock(struct file *file)
        return sb_start_write_trylock(file_inode(file)->i_sb);
 }
 
+/**
+ * file_end_write - drop write access to a superblock of a regular file
+ * @file: the file we wrote to
+ *
+ * Should be matched with a call to file_start_write().
+ */
 static inline void file_end_write(struct file *file)
 {
        if (!S_ISREG(file_inode(file)->i_mode))
                return;
-       __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+       sb_end_write(file_inode(file)->i_sb);
+}
+
+/**
+ * kiocb_start_write - get write access to a superblock for async file io
+ * @iocb: the io context we want to submit the write with
+ *
+ * This is a variant of sb_start_write() for async io submission.
+ * Should be matched with a call to kiocb_end_write().
+ */
+static inline void kiocb_start_write(struct kiocb *iocb)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+
+       sb_start_write(inode->i_sb);
+       /*
+        * Fool lockdep by telling it the lock got released so that it
+        * doesn't complain about the held lock when we return to userspace.
+        */
+       __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * kiocb_end_write - drop write access to a superblock after async file io
+ * @iocb: the io context we sumbitted the write with
+ *
+ * Should be matched with a call to kiocb_start_write().
+ */
+static inline void kiocb_end_write(struct kiocb *iocb)
+{
+       struct inode *inode = file_inode(iocb->ki_filp);
+
+       /*
+        * Tell lockdep we inherited freeze protection from submission thread.
+        */
+       __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+       sb_end_write(inode->i_sb);
 }
 
 /*
@@ -2607,8 +2787,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode)
 #if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
 static inline void i_readcount_dec(struct inode *inode)
 {
-       BUG_ON(!atomic_read(&inode->i_readcount));
-       atomic_dec(&inode->i_readcount);
+       BUG_ON(atomic_dec_return(&inode->i_readcount) < 0);
 }
 static inline void i_readcount_inc(struct inode *inode)
 {
@@ -2874,7 +3053,8 @@ extern void page_put_link(void *);
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
-void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *);
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode);
+void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
 void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -2913,7 +3093,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int);
 extern struct file_system_type *get_filesystem(struct file_system_type *fs);
 extern void put_filesystem(struct file_system_type *fs);
 extern struct file_system_type *get_fs_type(const char *name);
-extern struct super_block *get_super(struct block_device *);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern void drop_super(struct super_block *sb);
 extern void drop_super_exclusive(struct super_block *sb);
@@ -2934,6 +3113,8 @@ extern int simple_open(struct inode *inode, struct file *file);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+                            struct inode *new_dir, struct dentry *new_dentry);
 extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
                                  struct inode *new_dir, struct dentry *new_dentry);
 extern int simple_rename(struct mnt_idmap *, struct inode *,
@@ -2950,7 +3131,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
 extern const struct address_space_operations ram_aops;
 extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
-extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
+extern int simple_nosetlease(struct file *, int, struct file_lock **, void **);
 extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
@@ -2971,6 +3152,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
                const void __user *from, size_t count);
 
+struct offset_ctx {
+       struct xarray           xa;
+       u32                     next_offset;
+};
+
+void simple_offset_init(struct offset_ctx *octx);
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
+int simple_offset_rename_exchange(struct inode *old_dir,
+                                 struct dentry *old_dentry,
+                                 struct inode *new_dir,
+                                 struct dentry *new_dentry);
+void simple_offset_destroy(struct offset_ctx *octx);
+
+extern const struct file_operations simple_offset_dir_operations;
+
 extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
 extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
 
index ff6341e..96332db 100644 (file)
@@ -109,6 +109,7 @@ struct fs_context {
        bool                    need_free:1;    /* Need to call ops->free() */
        bool                    global:1;       /* Goes into &init_user_ns */
        bool                    oldapi:1;       /* Coming from mount(2) */
+       bool                    exclusive:1;    /* create new superblock, reject existing one */
 };
 
 struct fs_context_operations {
@@ -150,14 +151,13 @@ extern int get_tree_nodev(struct fs_context *fc,
 extern int get_tree_single(struct fs_context *fc,
                         int (*fill_super)(struct super_block *sb,
                                           struct fs_context *fc));
-extern int get_tree_single_reconf(struct fs_context *fc,
-                        int (*fill_super)(struct super_block *sb,
-                                          struct fs_context *fc));
 extern int get_tree_keyed(struct fs_context *fc,
                         int (*fill_super)(struct super_block *sb,
                                           struct fs_context *fc),
                         void *key);
 
+int setup_bdev_super(struct super_block *sb, int sb_flags,
+               struct fs_context *fc);
 extern int get_tree_bdev(struct fs_context *fc,
                               int (*fill_super)(struct super_block *sb,
                                                 struct fs_context *fc));
index 54210a4..010d39d 100644 (file)
@@ -24,7 +24,7 @@ static inline void fsstack_copy_attr_times(struct inode *dest,
 {
        dest->i_atime = src->i_atime;
        dest->i_mtime = src->i_mtime;
-       dest->i_ctime = src->i_ctime;
+       inode_set_ctime_to_ts(dest, inode_get_ctime(src));
 }
 
 #endif /* _LINUX_FS_STACK_H */
index 8e59bd9..aad9cf8 100644 (file)
@@ -41,6 +41,15 @@ struct ftrace_ops;
 struct ftrace_regs;
 struct dyn_ftrace;
 
+char *arch_ftrace_match_adjust(char *str, const char *search);
+
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
+struct fgraph_ret_regs;
+unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs);
+#else
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer);
+#endif
+
 #ifdef CONFIG_FUNCTION_TRACER
 /*
  * If the arch's mcount caller does not support all of ftrace's
@@ -675,7 +684,6 @@ void __init
 ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
 
 /* defined in arch */
-extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void);
 extern void ftrace_replace_code(int enable);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
@@ -850,9 +858,6 @@ static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_a
 }
 #endif
 
-/* May be defined in arch */
-extern int ftrace_arch_read_dyn_info(char *buf, int size);
-
 extern int skip_trace(unsigned long ip);
 extern void ftrace_module_init(struct module *mod);
 extern void ftrace_module_enable(struct module *mod);
index 2028438..e718dbe 100644 (file)
@@ -25,9 +25,6 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
 #endif
 
 vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
-struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
-                                  unsigned long addr, pmd_t *pmd,
-                                  unsigned int flags);
 bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
                           pmd_t *pmd, unsigned long addr, unsigned long next);
 int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
index bfbc37c..3ac3974 100644 (file)
@@ -1239,9 +1239,6 @@ extern int vmbus_recvpacket_raw(struct vmbus_channel *channel,
                                     u32 *buffer_actual_len,
                                     u64 *requestid);
 
-
-extern void vmbus_ontimer(unsigned long data);
-
 /* Base driver object */
 struct hv_driver {
        const char *name;
index e6936cb..33f21bd 100644 (file)
@@ -100,10 +100,16 @@ struct rapl_package;
 
 #define RAPL_DOMAIN_NAME_LENGTH 16
 
+union rapl_reg {
+       void __iomem *mmio;
+       u32 msr;
+       u64 val;
+};
+
 struct rapl_domain {
        char name[RAPL_DOMAIN_NAME_LENGTH];
        enum rapl_domain_type id;
-       u64 regs[RAPL_DOMAIN_REG_MAX];
+       union rapl_reg regs[RAPL_DOMAIN_REG_MAX];
        struct powercap_zone power_zone;
        struct rapl_domain_data rdd;
        struct rapl_power_limit rpl[NR_POWER_LIMITS];
@@ -116,7 +122,7 @@ struct rapl_domain {
 };
 
 struct reg_action {
-       u64 reg;
+       union rapl_reg reg;
        u64 mask;
        u64 value;
        int err;
@@ -143,8 +149,8 @@ struct rapl_if_priv {
        enum rapl_if_type type;
        struct powercap_control_type *control_type;
        enum cpuhp_state pcap_rapl_online;
-       u64 reg_unit;
-       u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
+       union rapl_reg reg_unit;
+       union rapl_reg regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
        int limits[RAPL_DOMAIN_MAX];
        int (*read_raw)(int id, struct reg_action *ra);
        int (*write_raw)(int id, struct reg_action *ra);
index e2b836c..fdc6e64 100644 (file)
@@ -261,9 +261,10 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
 int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
 void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
 bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos);
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
 bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
 void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
 int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
                const struct iomap_ops *ops);
 int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
index ea2bcda..9a4c204 100644 (file)
@@ -44,9 +44,7 @@ struct ism_dev {
        u64 local_gid;
        int ieq_idx;
 
-       atomic_t free_clients_cnt;
-       atomic_t add_dev_cnt;
-       wait_queue_head_t waitq;
+       struct ism_client *subs[MAX_CLIENTS];
 };
 
 struct ism_event {
@@ -68,9 +66,6 @@ struct ism_client {
         */
        void (*handle_irq)(struct ism_dev *dev, unsigned int bit, u16 dmbemask);
        /* Private area - don't touch! */
-       struct work_struct remove_work;
-       struct work_struct add_work;
-       struct ism_dev *tgt_ism;
        u8 id;
 };
 
index d860499..44c298a 100644 (file)
@@ -614,12 +614,6 @@ struct transaction_s
        struct journal_head     *t_checkpoint_list;
 
        /*
-        * Doubly-linked circular list of all buffers submitted for IO while
-        * checkpointing. [j_list_lock]
-        */
-       struct journal_head     *t_checkpoint_io_list;
-
-       /*
         * Doubly-linked circular list of metadata buffers being
         * shadowed by log IO.  The IO buffers on the iobuf list and
         * the shadow buffers on this list match each other one for
@@ -1449,6 +1443,7 @@ extern void jbd2_journal_commit_transaction(journal_t *);
 void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
 unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
+int jbd2_journal_try_remove_checkpoint(struct journal_head *jh);
 void jbd2_journal_destroy_checkpoint(journal_t *journal);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
index f10344d..164b4d0 100644 (file)
@@ -38,11 +38,92 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
        WRITE_ONCE(list->prev, list);
 }
 
+#ifdef CONFIG_LIST_HARDENED
+
 #ifdef CONFIG_DEBUG_LIST
-extern bool __list_add_valid(struct list_head *new,
-                             struct list_head *prev,
-                             struct list_head *next);
-extern bool __list_del_entry_valid(struct list_head *entry);
+# define __list_valid_slowpath
+#else
+# define __list_valid_slowpath __cold __preserve_most
+#endif
+
+/*
+ * Performs the full set of list corruption checks before __list_add().
+ * On list corruption reports a warning, and returns false.
+ */
+extern bool __list_valid_slowpath __list_add_valid_or_report(struct list_head *new,
+                                                            struct list_head *prev,
+                                                            struct list_head *next);
+
+/*
+ * Performs list corruption checks before __list_add(). Returns false if a
+ * corruption is detected, true otherwise.
+ *
+ * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
+ * inline to catch non-faulting corruptions, and only if a corruption is
+ * detected calls the reporting function __list_add_valid_or_report().
+ */
+static __always_inline bool __list_add_valid(struct list_head *new,
+                                            struct list_head *prev,
+                                            struct list_head *next)
+{
+       bool ret = true;
+
+       if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
+               /*
+                * With the hardening version, elide checking if next and prev
+                * are NULL, since the immediate dereference of them below would
+                * result in a fault if NULL.
+                *
+                * With the reduced set of checks, we can afford to inline the
+                * checks, which also gives the compiler a chance to elide some
+                * of them completely if they can be proven at compile-time. If
+                * one of the pre-conditions does not hold, the slow-path will
+                * show a report which pre-condition failed.
+                */
+               if (likely(next->prev == prev && prev->next == next && new != prev && new != next))
+                       return true;
+               ret = false;
+       }
+
+       ret &= __list_add_valid_or_report(new, prev, next);
+       return ret;
+}
+
+/*
+ * Performs the full set of list corruption checks before __list_del_entry().
+ * On list corruption reports a warning, and returns false.
+ */
+extern bool __list_valid_slowpath __list_del_entry_valid_or_report(struct list_head *entry);
+
+/*
+ * Performs list corruption checks before __list_del_entry(). Returns false if a
+ * corruption is detected, true otherwise.
+ *
+ * With CONFIG_LIST_HARDENED only, performs minimal list integrity checking
+ * inline to catch non-faulting corruptions, and only if a corruption is
+ * detected calls the reporting function __list_del_entry_valid_or_report().
+ */
+static __always_inline bool __list_del_entry_valid(struct list_head *entry)
+{
+       bool ret = true;
+
+       if (!IS_ENABLED(CONFIG_DEBUG_LIST)) {
+               struct list_head *prev = entry->prev;
+               struct list_head *next = entry->next;
+
+               /*
+                * With the hardening version, elide checking if next and prev
+                * are NULL, LIST_POISON1 or LIST_POISON2, since the immediate
+                * dereference of them below would result in a fault.
+                */
+               if (likely(prev->next == entry && next->prev == entry))
+                       return true;
+               ret = false;
+       }
+
+       ret &= __list_del_entry_valid_or_report(entry);
+       return ret;
+}
 #else
 static inline bool __list_add_valid(struct list_head *new,
                                struct list_head *prev,
index 7308a1a..af79698 100644 (file)
@@ -54,6 +54,7 @@ LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *f
 LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
 LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm)
 LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm)
+LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference)
 LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
         struct fs_context *src_sc)
 LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
index 2dd73e4..34f9dba 100644 (file)
@@ -641,8 +641,14 @@ static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
  */
 static inline bool vma_start_read(struct vm_area_struct *vma)
 {
-       /* Check before locking. A race might cause false locked result. */
-       if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))
+       /*
+        * Check before locking. A race might cause false locked result.
+        * We can use READ_ONCE() for the mm_lock_seq here, and don't need
+        * ACQUIRE semantics, because this is just a lockless check whose result
+        * we don't rely on for anything - the mm_lock_seq read against which we
+        * need ordering is below.
+        */
+       if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
                return false;
 
        if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
@@ -653,8 +659,13 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
         * False unlocked result is impossible because we modify and check
         * vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq
         * modification invalidates all existing locks.
+        *
+        * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
+        * racing with vma_end_write_all(), we only start reading from the VMA
+        * after it has been unlocked.
+        * This pairs with RELEASE semantics in vma_end_write_all().
         */
-       if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) {
+       if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
                up_read(&vma->vm_lock->lock);
                return false;
        }
@@ -676,7 +687,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
         * current task is holding mmap_write_lock, both vma->vm_lock_seq and
         * mm->mm_lock_seq can't be concurrently modified.
         */
-       *mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq);
+       *mm_lock_seq = vma->vm_mm->mm_lock_seq;
        return (vma->vm_lock_seq == *mm_lock_seq);
 }
 
@@ -688,7 +699,13 @@ static inline void vma_start_write(struct vm_area_struct *vma)
                return;
 
        down_write(&vma->vm_lock->lock);
-       vma->vm_lock_seq = mm_lock_seq;
+       /*
+        * We should use WRITE_ONCE() here because we can have concurrent reads
+        * from the early lockless pessimistic check in vma_start_read().
+        * We don't really care about the correctness of that early check, but
+        * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy.
+        */
+       WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
        up_write(&vma->vm_lock->lock);
 }
 
@@ -702,7 +719,7 @@ static inline bool vma_try_start_write(struct vm_area_struct *vma)
        if (!down_write_trylock(&vma->vm_lock->lock))
                return false;
 
-       vma->vm_lock_seq = mm_lock_seq;
+       WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
        up_write(&vma->vm_lock->lock);
        return true;
 }
@@ -3404,15 +3421,24 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
  * Indicates whether GUP can follow a PROT_NONE mapped page, or whether
  * a (NUMA hinting) fault is required.
  */
-static inline bool gup_can_follow_protnone(unsigned int flags)
+static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
+                                          unsigned int flags)
 {
        /*
-        * FOLL_FORCE has to be able to make progress even if the VMA is
-        * inaccessible. Further, FOLL_FORCE access usually does not represent
-        * application behaviour and we should avoid triggering NUMA hinting
-        * faults.
+        * If callers don't want to honor NUMA hinting faults, no need to
+        * determine if we would actually have to trigger a NUMA hinting fault.
+        */
+       if (!(flags & FOLL_HONOR_NUMA_FAULT))
+               return true;
+
+       /*
+        * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
+        *
+        * Requiring a fault here even for inaccessible VMAs would mean that
+        * FOLL_FORCE cannot make any progress, because handle_mm_fault()
+        * refuses to process NUMA hinting faults in inaccessible VMAs.
         */
-       return flags & FOLL_FORCE;
+       return !vma_is_accessible(vma);
 }
 
 typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
index de10fc7..7d30dc4 100644 (file)
@@ -514,6 +514,20 @@ struct vm_area_struct {
        };
 
 #ifdef CONFIG_PER_VMA_LOCK
+       /*
+        * Can only be written (using WRITE_ONCE()) while holding both:
+        *  - mmap_lock (in write mode)
+        *  - vm_lock->lock (in write mode)
+        * Can be read reliably while holding one of:
+        *  - mmap_lock (in read or write mode)
+        *  - vm_lock->lock (in read or write mode)
+        * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
+        * while holding nothing (except RCU to keep the VMA struct allocated).
+        *
+        * This sequence counter is explicitly allowed to overflow; sequence
+        * counter reuse can only lead to occasional unnecessary use of the
+        * slowpath.
+        */
        int vm_lock_seq;
        struct vma_lock *vm_lock;
 
@@ -679,6 +693,20 @@ struct mm_struct {
                                          * by mmlist_lock
                                          */
 #ifdef CONFIG_PER_VMA_LOCK
+               /*
+                * This field has lock-like semantics, meaning it is sometimes
+                * accessed with ACQUIRE/RELEASE semantics.
+                * Roughly speaking, incrementing the sequence number is
+                * equivalent to releasing locks on VMAs; reading the sequence
+                * number can be part of taking a read lock on a VMA.
+                *
+                * Can be modified under write mmap_lock using RELEASE
+                * semantics.
+                * Can be read with no other protection when holding write
+                * mmap_lock.
+                * Can be read with ACQUIRE semantics if not holding write
+                * mmap_lock.
+                */
                int mm_lock_seq;
 #endif
 
@@ -1258,6 +1286,15 @@ enum {
        FOLL_PCI_P2PDMA = 1 << 10,
        /* allow interrupts from generic signals */
        FOLL_INTERRUPTIBLE = 1 << 11,
+       /*
+        * Always honor (trigger) NUMA hinting faults.
+        *
+        * FOLL_WRITE implicitly honors NUMA hinting faults because a
+        * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
+        * apply). get_user_pages_fast_only() always implicitly honors NUMA
+        * hinting faults.
+        */
+       FOLL_HONOR_NUMA_FAULT = 1 << 12,
 
        /* See also internal only FOLL flags in mm/internal.h */
 };
index aab8f1b..e05e167 100644 (file)
@@ -76,8 +76,14 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
 static inline void vma_end_write_all(struct mm_struct *mm)
 {
        mmap_assert_write_locked(mm);
-       /* No races during update due to exclusive mmap_lock being held */
-       WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1);
+       /*
+        * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
+        * mmap_lock being held.
+        * We need RELEASE semantics here to ensure that preceding stores into
+        * the VMA take effect before we unlock it with this store.
+        * Pairs with ACQUIRE semantics in vma_start_read().
+        */
+       smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
 }
 #else
 static inline void vma_end_write_all(struct mm_struct *mm) {}
index 499e486..e0bf836 100644 (file)
@@ -47,7 +47,7 @@ enum utf16_endian {
 /* nls_base.c */
 extern int __register_nls(struct nls_table *, struct module *);
 extern int unregister_nls(struct nls_table *);
-extern struct nls_table *load_nls(char *);
+extern struct nls_table *load_nls(const char *charset);
 extern void unload_nls(struct nls_table *);
 extern struct nls_table *load_nls_default(void);
 #define register_nls(nls) __register_nls((nls), THIS_MODULE)
index 8654470..45702bd 100644 (file)
@@ -73,9 +73,7 @@ struct raw_notifier_head {
 
 struct srcu_notifier_head {
        struct mutex mutex;
-#ifdef CONFIG_TREE_SRCU
        struct srcu_usage srcuu;
-#endif
        struct srcu_struct srcu;
        struct notifier_block __rcu *head;
 };
@@ -106,7 +104,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 #define RAW_NOTIFIER_INIT(name)        {                               \
                .head = NULL }
 
-#ifdef CONFIG_TREE_SRCU
 #define SRCU_NOTIFIER_INIT(name, pcpu)                         \
        {                                                       \
                .mutex = __MUTEX_INITIALIZER(name.mutex),       \
@@ -114,14 +111,6 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
                .srcuu = __SRCU_USAGE_INIT(name.srcuu),         \
                .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
        }
-#else
-#define SRCU_NOTIFIER_INIT(name, pcpu)                         \
-       {                                                       \
-               .mutex = __MUTEX_INITIALIZER(name.mutex),       \
-               .head = NULL,                                   \
-               .srcu = __SRCU_STRUCT_INIT(name.srcu, name.srcuu, pcpu), \
-       }
-#endif
 
 #define ATOMIC_NOTIFIER_HEAD(name)                             \
        struct atomic_notifier_head name =                      \
index fee881c..771cb02 100644 (file)
@@ -29,7 +29,7 @@ struct fs_struct;
  * nsproxy is copied.
  */
 struct nsproxy {
-       atomic_t count;
+       refcount_t count;
        struct uts_namespace *uts_ns;
        struct ipc_namespace *ipc_ns;
        struct mnt_namespace *mnt_ns;
@@ -102,14 +102,13 @@ int __init nsproxy_cache_init(void);
 
 static inline void put_nsproxy(struct nsproxy *ns)
 {
-       if (atomic_dec_and_test(&ns->count)) {
+       if (refcount_dec_and_test(&ns->count))
                free_nsproxy(ns);
-       }
 }
 
 static inline void get_nsproxy(struct nsproxy *ns)
 {
-       atomic_inc(&ns->count);
+       refcount_inc(&ns->count);
 }
 
 #endif
index 182b6d6..26dd3f8 100644 (file)
@@ -473,7 +473,7 @@ struct nvme_id_ns_nvm {
 };
 
 enum {
-       NVME_ID_NS_NVM_STS_MASK         = 0x3f,
+       NVME_ID_NS_NVM_STS_MASK         = 0x7f,
        NVME_ID_NS_NVM_GUARD_SHIFT      = 7,
        NVME_ID_NS_NVM_GUARD_MASK       = 0x3,
 };
index 716953e..d87840a 100644 (file)
@@ -470,6 +470,19 @@ static inline void *detach_page_private(struct page *page)
        return folio_detach_private(page_folio(page));
 }
 
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size.  I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER    HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER    8
+#endif
+
 #ifdef CONFIG_NUMA
 struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
 #else
@@ -501,22 +514,69 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
 pgoff_t page_cache_prev_miss(struct address_space *mapping,
                             pgoff_t index, unsigned long max_scan);
 
-#define FGP_ACCESSED           0x00000001
-#define FGP_LOCK               0x00000002
-#define FGP_CREAT              0x00000004
-#define FGP_WRITE              0x00000008
-#define FGP_NOFS               0x00000010
-#define FGP_NOWAIT             0x00000020
-#define FGP_FOR_MMAP           0x00000040
-#define FGP_STABLE             0x00000080
+/**
+ * typedef fgf_t - Flags for getting folios from the page cache.
+ *
+ * Most users of the page cache will not need to use these flags;
+ * there are convenience functions such as filemap_get_folio() and
+ * filemap_lock_folio().  For users which need more control over exactly
+ * what is done with the folios, these flags to __filemap_get_folio()
+ * are available.
+ *
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
+ * * %FGP_CREAT - If no folio is present then a new folio is allocated,
+ *   added to the page cache and the VM's LRU list.  The folio is
+ *   returned locked.
+ * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
+ *   folio is already in cache.  If the folio was allocated, unlock it
+ *   before returning so the caller can do the same dance.
+ * * %FGP_WRITE - The folio will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't block on the folio lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
+ * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
+ *   implementation.
+ */
+typedef unsigned int __bitwise fgf_t;
+
+#define FGP_ACCESSED           ((__force fgf_t)0x00000001)
+#define FGP_LOCK               ((__force fgf_t)0x00000002)
+#define FGP_CREAT              ((__force fgf_t)0x00000004)
+#define FGP_WRITE              ((__force fgf_t)0x00000008)
+#define FGP_NOFS               ((__force fgf_t)0x00000010)
+#define FGP_NOWAIT             ((__force fgf_t)0x00000020)
+#define FGP_FOR_MMAP           ((__force fgf_t)0x00000040)
+#define FGP_STABLE             ((__force fgf_t)0x00000080)
+#define FGF_GET_ORDER(fgf)     (((__force unsigned)fgf) >> 26) /* top 6 bits */
 
 #define FGP_WRITEBEGIN         (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
 
+/**
+ * fgf_set_order - Encode a length in the fgf_t flags.
+ * @size: The suggested size of the folio to create.
+ *
+ * The caller of __filemap_get_folio() can use this to suggest a preferred
+ * size for the folio that is created.  If there is already a folio at
+ * the index, it will be returned, no matter what its size.  If a folio
+ * is freshly created, it may be of a different size than requested
+ * due to alignment constraints, memory pressure, or the presence of
+ * other folios at nearby indices.
+ */
+static inline fgf_t fgf_set_order(size_t size)
+{
+       unsigned int shift = ilog2(size);
+
+       if (shift <= PAGE_SHIFT)
+               return 0;
+       return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
+}
+
 void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
-               int fgp_flags, gfp_t gfp);
+               fgf_t fgp_flags, gfp_t gfp);
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
-               int fgp_flags, gfp_t gfp);
+               fgf_t fgp_flags, gfp_t gfp);
 
 /**
  * filemap_get_folio - Find and get a folio.
@@ -590,7 +650,7 @@ static inline struct page *find_get_page(struct address_space *mapping,
 }
 
 static inline struct page *find_get_page_flags(struct address_space *mapping,
-                                       pgoff_t offset, int fgp_flags)
+                                       pgoff_t offset, fgf_t fgp_flags)
 {
        return pagecache_get_page(mapping, offset, fgp_flags, 0);
 }
index 27a6df4..27cd1e5 100644 (file)
@@ -6,6 +6,16 @@
 
 struct mm_walk;
 
+/* Locking requirement during a page walk. */
+enum page_walk_lock {
+       /* mmap_lock should be locked for read to stabilize the vma tree */
+       PGWALK_RDLOCK = 0,
+       /* vma will be write-locked during the walk */
+       PGWALK_WRLOCK = 1,
+       /* vma is expected to be already write-locked during the walk */
+       PGWALK_WRLOCK_VERIFY = 2,
+};
+
 /**
  * struct mm_walk_ops - callbacks for walk_page_range
  * @pgd_entry:         if set, called for each non-empty PGD (top-level) entry
@@ -66,6 +76,7 @@ struct mm_walk_ops {
        int (*pre_vma)(unsigned long start, unsigned long end,
                       struct mm_walk *walk);
        void (*post_vma)(struct mm_walk *walk);
+       enum page_walk_lock walk_lock;
 };
 
 /*
index 2dc75df..8f9a459 100644 (file)
 #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F3 0x14e3
 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F3 0x14f3
 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F3 0x12fb
+#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3 0x12c3
+#define PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3 0x16fb
 #define PCI_DEVICE_ID_AMD_MI200_DF_F3  0x14d3
 #define PCI_DEVICE_ID_AMD_CNB17H_F3    0x1703
 #define PCI_DEVICE_ID_AMD_LANCE                0x2000
index a0801f6..143fbc1 100644 (file)
@@ -187,5 +187,6 @@ void armpmu_free_irq(int irq, int cpu);
 #endif /* CONFIG_ARM_PMU */
 
 #define ARMV8_SPE_PDEV_NAME "arm,spe-v1"
+#define ARMV8_TRBE_PDEV_NAME "arm,trbe"
 
 #endif /* __ARM_PMU_H__ */
index 2166a69..05253af 100644 (file)
@@ -288,10 +288,9 @@ struct perf_event_pmu_context;
 #define PERF_PMU_CAP_EXTENDED_REGS             0x0008
 #define PERF_PMU_CAP_EXCLUSIVE                 0x0010
 #define PERF_PMU_CAP_ITRACE                    0x0020
-#define PERF_PMU_CAP_HETEROGENEOUS_CPUS                0x0040
-#define PERF_PMU_CAP_NO_EXCLUDE                        0x0080
-#define PERF_PMU_CAP_AUX_OUTPUT                        0x0100
-#define PERF_PMU_CAP_EXTENDED_HW_TYPE          0x0200
+#define PERF_PMU_CAP_NO_EXCLUDE                        0x0040
+#define PERF_PMU_CAP_AUX_OUTPUT                        0x0080
+#define PERF_PMU_CAP_EXTENDED_HW_TYPE          0x0100
 
 struct perf_output_handle;
 
@@ -1194,7 +1193,8 @@ struct perf_sample_data {
                    PERF_MEM_S(LVL, NA)   |\
                    PERF_MEM_S(SNOOP, NA) |\
                    PERF_MEM_S(LOCK, NA)  |\
-                   PERF_MEM_S(TLB, NA))
+                   PERF_MEM_S(TLB, NA)   |\
+                   PERF_MEM_S(LVLNUM, NA))
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
                                         u64 addr, u64 period)
@@ -1316,15 +1316,31 @@ extern int perf_event_output(struct perf_event *event,
                             struct pt_regs *regs);
 
 static inline bool
-is_default_overflow_handler(struct perf_event *event)
+__is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
 {
-       if (likely(event->overflow_handler == perf_event_output_forward))
+       if (likely(overflow_handler == perf_event_output_forward))
                return true;
-       if (unlikely(event->overflow_handler == perf_event_output_backward))
+       if (unlikely(overflow_handler == perf_event_output_backward))
                return true;
        return false;
 }
 
+#define is_default_overflow_handler(event) \
+       __is_default_overflow_handler((event)->overflow_handler)
+
+#ifdef CONFIG_BPF_SYSCALL
+static inline bool uses_default_overflow_handler(struct perf_event *event)
+{
+       if (likely(is_default_overflow_handler(event)))
+               return true;
+
+       return __is_default_overflow_handler(event->orig_overflow_handler);
+}
+#else
+#define uses_default_overflow_handler(event) \
+       is_default_overflow_handler(event)
+#endif
+
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
                           struct perf_sample_data *data,
@@ -1860,10 +1876,6 @@ extern void arch_perf_update_userpage(struct perf_event *event,
                                      struct perf_event_mmap_page *userpg,
                                      u64 now);
 
-#ifdef CONFIG_MMU
-extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
-#endif
-
 /*
  * Snapshot branch stack on software events.
  *
index 02e0086..608a9eb 100644 (file)
@@ -269,10 +269,10 @@ bool pipe_is_unprivileged_user(void);
 
 /* for F_SETPIPE_SZ and F_GETPIPE_SZ */
 int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
-long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
+long pipe_fcntl(struct file *, unsigned int, unsigned int arg);
 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
 
 int create_pipe_files(struct file **, int);
-unsigned int round_pipe_size(unsigned long size);
+unsigned int round_pipe_size(unsigned int size);
 
 #endif
index dc1fb58..91f87d7 100644 (file)
@@ -103,7 +103,7 @@ int dev_pm_opp_get_supplies(struct dev_pm_opp *opp, struct dev_pm_opp_supply *su
 
 unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp);
 
-unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp);
+unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index);
 
 unsigned int dev_pm_opp_get_level(struct dev_pm_opp *opp);
 
@@ -121,17 +121,29 @@ unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev);
 struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
                                              unsigned long freq,
                                              bool available);
+
+struct dev_pm_opp *
+dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
+                                  u32 index, bool available);
+
 struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
                                              unsigned long *freq);
 
+struct dev_pm_opp *dev_pm_opp_find_freq_floor_indexed(struct device *dev,
+                                                     unsigned long *freq, u32 index);
+
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
+                                            unsigned long *freq);
+
+struct dev_pm_opp *dev_pm_opp_find_freq_ceil_indexed(struct device *dev,
+                                                    unsigned long *freq, u32 index);
+
 struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
                                               unsigned int level);
+
 struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
                                              unsigned int *level);
 
-struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
-                                            unsigned long *freq);
-
 struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
                                           unsigned int *bw, int index);
 
@@ -200,7 +212,7 @@ static inline unsigned long dev_pm_opp_get_power(struct dev_pm_opp *opp)
        return 0;
 }
 
-static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
+static inline unsigned long dev_pm_opp_get_freq_indexed(struct dev_pm_opp *opp, u32 index)
 {
        return 0;
 }
@@ -247,26 +259,27 @@ static inline unsigned long dev_pm_opp_get_suspend_opp_freq(struct device *dev)
        return 0;
 }
 
-static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
-                                       unsigned int level)
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
+                                       unsigned long freq, bool available)
 {
        return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
-                                       unsigned int *level)
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_exact_indexed(struct device *dev, unsigned long freq,
+                                  u32 index, bool available)
 {
        return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline struct dev_pm_opp *dev_pm_opp_find_freq_exact(struct device *dev,
-                                       unsigned long freq, bool available)
+static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
+                                       unsigned long *freq)
 {
        return ERR_PTR(-EOPNOTSUPP);
 }
 
-static inline struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev,
-                                       unsigned long *freq)
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_floor_indexed(struct device *dev, unsigned long *freq, u32 index)
 {
        return ERR_PTR(-EOPNOTSUPP);
 }
@@ -277,6 +290,24 @@ static inline struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
        return ERR_PTR(-EOPNOTSUPP);
 }
 
+static inline struct dev_pm_opp *
+dev_pm_opp_find_freq_ceil_indexed(struct device *dev, unsigned long *freq, u32 index)
+{
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_level_exact(struct device *dev,
+                                       unsigned int level)
+{
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline struct dev_pm_opp *dev_pm_opp_find_level_ceil(struct device *dev,
+                                       unsigned int *level)
+{
+       return ERR_PTR(-EOPNOTSUPP);
+}
+
 static inline struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev,
                                        unsigned int *bw, int index)
 {
@@ -631,4 +662,9 @@ static inline void dev_pm_opp_put_prop_name(int token)
        dev_pm_opp_clear_config(token);
 }
 
+static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp)
+{
+       return dev_pm_opp_get_freq_indexed(opp, 0);
+}
+
 #endif         /* __LINUX_OPP_H__ */
index 9a8151a..7c9b354 100644 (file)
@@ -85,8 +85,6 @@ extern void pm_runtime_irq_safe(struct device *dev);
 extern void __pm_runtime_use_autosuspend(struct device *dev, bool use);
 extern void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
 extern u64 pm_runtime_autosuspend_expiration(struct device *dev);
-extern void pm_runtime_update_max_time_suspended(struct device *dev,
-                                                s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
 extern void pm_runtime_get_suppliers(struct device *dev);
 extern void pm_runtime_put_suppliers(struct device *dev);
index dd42d16..d9642c6 100644 (file)
@@ -10,8 +10,6 @@ extern int dev_pm_set_wake_irq(struct device *dev, int irq);
 extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq);
 extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq);
 extern void dev_pm_clear_wake_irq(struct device *dev);
-extern void dev_pm_enable_wake_irq(struct device *dev);
-extern void dev_pm_disable_wake_irq(struct device *dev);
 
 #else  /* !CONFIG_PM */
 
@@ -34,13 +32,5 @@ static inline void dev_pm_clear_wake_irq(struct device *dev)
 {
 }
 
-static inline void dev_pm_enable_wake_irq(struct device *dev)
-{
-}
-
-static inline void dev_pm_disable_wake_irq(struct device *dev)
-{
-}
-
 #endif /* CONFIG_PM */
 #endif /* _LINUX_PM_WAKEIRQ_H */
index 77f4849..6eb9ada 100644 (file)
@@ -194,6 +194,16 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec,
 
 #endif /* !CONFIG_PM_SLEEP */
 
+static inline bool device_awake_path(struct device *dev)
+{
+       return device_wakeup_path(dev);
+}
+
+static inline void device_set_awake_path(struct device *dev)
+{
+       device_set_wakeup_path(dev);
+}
+
 static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
 {
        return pm_wakeup_ws_event(ws, msec, false);
index b83a3f9..b068e2e 100644 (file)
@@ -25,11 +25,10 @@ struct page;
        prefetch() should be defined by the architecture, if not, the 
        #define below provides a no-op define.  
        
-       There are 3 prefetch() macros:
+       There are 2 prefetch() macros:
        
        prefetch(x)     - prefetches the cacheline at "x" for read
        prefetchw(x)    - prefetches the cacheline at "x" for write
-       spin_lock_prefetch(x) - prefetches the spinlock *x for taking
        
        there is also PREFETCH_STRIDE which is the architecure-preferred 
        "lookahead" size for prefetching streamed operations.
@@ -44,10 +43,6 @@ struct page;
 #define prefetchw(x) __builtin_prefetch(x,1)
 #endif
 
-#ifndef ARCH_HAS_SPINLOCK_PREFETCH
-#define spin_lock_prefetch(x) prefetchw(x)
-#endif
-
 #ifndef PREFETCH_STRIDE
 #define PREFETCH_STRIDE (4*L1_CACHE_BYTES)
 #endif
index ab26200..e074587 100644 (file)
@@ -23,8 +23,9 @@ void psi_memstall_enter(unsigned long *flags);
 void psi_memstall_leave(unsigned long *flags);
 
 int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
-struct psi_trigger *psi_trigger_create(struct psi_group *group,
-                       char *buf, enum psi_res res, struct file *file);
+struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
+                                      enum psi_res res, struct file *file,
+                                      struct kernfs_open_file *of);
 void psi_trigger_destroy(struct psi_trigger *t);
 
 __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
index 040c089..f1fd3a8 100644 (file)
@@ -137,6 +137,9 @@ struct psi_trigger {
        /* Wait queue for polling */
        wait_queue_head_t event_wait;
 
+       /* Kernfs file for cgroup triggers */
+       struct kernfs_open_file *of;
+
        /* Pending event flag */
        int event;
 
index 6a9b177..e50416b 100644 (file)
@@ -77,7 +77,3 @@ DEFINE_RAID_ATTRIBUTE(enum raid_state, state)
        
 struct raid_template *raid_class_attach(struct raid_function_template *);
 void raid_class_release(struct raid_template *);
-
-int __must_check raid_component_add(struct raid_template *, struct device *,
-                                   struct device *);
-
index 7ee7ed5..6dbc5a1 100644 (file)
@@ -60,6 +60,32 @@ rb_insert_augmented_cached(struct rb_node *node,
        rb_insert_augmented(node, &root->rb_root, augment);
 }
 
+static __always_inline struct rb_node *
+rb_add_augmented_cached(struct rb_node *node, struct rb_root_cached *tree,
+                       bool (*less)(struct rb_node *, const struct rb_node *),
+                       const struct rb_augment_callbacks *augment)
+{
+       struct rb_node **link = &tree->rb_root.rb_node;
+       struct rb_node *parent = NULL;
+       bool leftmost = true;
+
+       while (*link) {
+               parent = *link;
+               if (less(node, parent)) {
+                       link = &parent->rb_left;
+               } else {
+                       link = &parent->rb_right;
+                       leftmost = false;
+               }
+       }
+
+       rb_link_node(node, parent, link);
+       augment->propagate(parent, NULL); /* suboptimal */
+       rb_insert_augmented_cached(node, tree, leftmost, augment);
+
+       return leftmost ? node : NULL;
+}
+
 /*
  * Template for declaring augmented rbtree callbacks (generic case)
  *
index ba4c00d..89186c4 100644 (file)
@@ -101,7 +101,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
 {
        struct hlist_nulls_node *first = h->first;
 
-       n->next = first;
+       WRITE_ONCE(n->next, first);
        WRITE_ONCE(n->pprev, &h->first);
        rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
        if (!is_a_nulls(first))
@@ -137,7 +137,7 @@ static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
                last = i;
 
        if (last) {
-               n->next = last->next;
+               WRITE_ONCE(n->next, last->next);
                n->pprev = &last->next;
                rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
        } else {
index 9bc8cbb..eda4932 100644 (file)
@@ -87,6 +87,7 @@ static inline void rcu_read_unlock_trace(void)
 void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
 void synchronize_rcu_tasks_trace(void);
 void rcu_barrier_tasks_trace(void);
+struct task_struct *get_rcu_tasks_trace_gp_kthread(void);
 #else
 /*
  * The BPF JIT forms these addresses even when it doesn't call these
index 699b938..5e0f74f 100644 (file)
@@ -42,6 +42,11 @@ do {                                                                 \
  * call_srcu() function, with this wrapper supplying the pointer to the
  * corresponding srcu_struct.
  *
+ * Note that call_rcu_hurry() should be used instead of call_rcu()
+ * because in kernels built with CONFIG_RCU_LAZY=y the delay between the
+ * invocation of call_rcu() and that of the corresponding RCU callback
+ * can be multiple seconds.
+ *
  * The first argument tells Tiny RCU's _wait_rcu_gp() not to
  * bother waiting for RCU.  The reason for this is because anywhere
  * synchronize_rcu_mult() can be called is automatically already a full
index fdf26cd..26b6f3c 100644 (file)
@@ -59,6 +59,7 @@ struct rethook_node {
 };
 
 struct rethook *rethook_alloc(void *data, rethook_handler_t handler);
+void rethook_stop(struct rethook *rh);
 void rethook_free(struct rethook *rh);
 void rethook_add_node(struct rethook *rh, struct rethook_node *node);
 struct rethook_node *rethook_try_get(struct rethook *rh);
index 609bde8..177b3f3 100644 (file)
@@ -75,14 +75,14 @@ struct user_event_mm;
  * Task state bitmask. NOTE! These bits are also
  * encoded in fs/proc/array.c: get_task_state().
  *
- * We have two separate sets of flags: task->state
+ * We have two separate sets of flags: task->__state
  * is about runnability, while task->exit_state are
  * about the task exiting. Confusing, but this way
  * modifying one set can't modify the other one by
  * mistake.
  */
 
-/* Used in tsk->state: */
+/* Used in tsk->__state: */
 #define TASK_RUNNING                   0x00000000
 #define TASK_INTERRUPTIBLE             0x00000001
 #define TASK_UNINTERRUPTIBLE           0x00000002
@@ -92,7 +92,7 @@ struct user_event_mm;
 #define EXIT_DEAD                      0x00000010
 #define EXIT_ZOMBIE                    0x00000020
 #define EXIT_TRACE                     (EXIT_ZOMBIE | EXIT_DEAD)
-/* Used in tsk->state again: */
+/* Used in tsk->__state again: */
 #define TASK_PARKED                    0x00000040
 #define TASK_DEAD                      0x00000080
 #define TASK_WAKEKILL                  0x00000100
@@ -173,7 +173,7 @@ struct user_event_mm;
 #endif
 
 /*
- * set_current_state() includes a barrier so that the write of current->state
+ * set_current_state() includes a barrier so that the write of current->__state
  * is correctly serialised wrt the caller's subsequent test of whether to
  * actually sleep:
  *
@@ -196,9 +196,9 @@ struct user_event_mm;
  *   wake_up_state(p, TASK_UNINTERRUPTIBLE);
  *
  * where wake_up_state()/try_to_wake_up() executes a full memory barrier before
- * accessing p->state.
+ * accessing p->__state.
  *
- * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
+ * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is,
  * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
  * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
  *
@@ -549,13 +549,18 @@ struct sched_entity {
        /* For load-balancing: */
        struct load_weight              load;
        struct rb_node                  run_node;
+       u64                             deadline;
+       u64                             min_deadline;
+
        struct list_head                group_node;
        unsigned int                    on_rq;
 
        u64                             exec_start;
        u64                             sum_exec_runtime;
-       u64                             vruntime;
        u64                             prev_sum_exec_runtime;
+       u64                             vruntime;
+       s64                             vlag;
+       u64                             slice;
 
        u64                             nr_migrations;
 
@@ -2433,9 +2438,11 @@ extern void sched_core_free(struct task_struct *tsk);
 extern void sched_core_fork(struct task_struct *p);
 extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type,
                                unsigned long uaddr);
+extern int sched_core_idle_cpu(int cpu);
 #else
 static inline void sched_core_free(struct task_struct *tsk) { }
 static inline void sched_core_fork(struct task_struct *p) { }
+static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); }
 #endif
 
 extern void sched_set_stop_task(int cpu, struct task_struct *stop);
index dd35ce2..a23af22 100644 (file)
@@ -118,11 +118,47 @@ static inline struct task_struct *get_task_struct(struct task_struct *t)
 }
 
 extern void __put_task_struct(struct task_struct *t);
+extern void __put_task_struct_rcu_cb(struct rcu_head *rhp);
 
 static inline void put_task_struct(struct task_struct *t)
 {
-       if (refcount_dec_and_test(&t->usage))
+       if (!refcount_dec_and_test(&t->usage))
+               return;
+
+       /*
+        * In !RT, it is always safe to call __put_task_struct().
+        * Under RT, we can only call it in preemptible context.
+        */
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+               static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP);
+
+               lock_map_acquire_try(&put_task_map);
                __put_task_struct(t);
+               lock_map_release(&put_task_map);
+               return;
+       }
+
+       /*
+        * under PREEMPT_RT, we can't call put_task_struct
+        * in atomic context because it will indirectly
+        * acquire sleeping locks.
+        *
+        * call_rcu() will schedule delayed_put_task_struct_rcu()
+        * to be called in process context.
+        *
+        * __put_task_struct() is called when
+        * refcount_dec_and_test(&t->usage) succeeds.
+        *
+        * This means that it can't "conflict" with
+        * put_task_struct_rcu_user() which abuses ->rcu the same
+        * way; rcu_users has a reference so task->usage can't be
+        * zero after rcu_users 1 -> 0 transition.
+        *
+        * delayed_free_task() also uses ->rcu, but it is only called
+        * when it fails to fork a process. Therefore, there is no
+        * way it can conflict with put_task_struct().
+        */
+       call_rcu(&t->rcu, __put_task_struct_rcu_cb);
 }
 
 DEFINE_FREE(put_task, struct task_struct *, if (_T) put_task_struct(_T))
index 3282850..bac98ea 100644 (file)
@@ -293,6 +293,7 @@ int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file);
 int security_bprm_check(struct linux_binprm *bprm);
 void security_bprm_committing_creds(struct linux_binprm *bprm);
 void security_bprm_committed_creds(struct linux_binprm *bprm);
+int security_fs_context_submount(struct fs_context *fc, struct super_block *reference);
 int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc);
 int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
 int security_sb_alloc(struct super_block *sb);
@@ -629,6 +630,11 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
 {
 }
 
+static inline int security_fs_context_submount(struct fs_context *fc,
+                                          struct super_block *reference)
+{
+       return 0;
+}
 static inline int security_fs_context_dup(struct fs_context *fc,
                                          struct fs_context *src_fc)
 {
index bd023dd..386ab58 100644 (file)
@@ -249,18 +249,19 @@ static inline void seq_show_option(struct seq_file *m, const char *name,
 
 /**
  * seq_show_option_n - display mount options with appropriate escapes
- *                    where @value must be a specific length.
+ *                    where @value must be a specific length (i.e.
+ *                    not NUL-terminated).
  * @m: the seq_file handle
  * @name: the mount option name
  * @value: the mount option name's value, cannot be NULL
- * @length: the length of @value to display
+ * @length: the exact length of @value to display, must be constant expression
  *
  * This is a macro since this uses "length" to define the size of the
  * stack buffer.
  */
 #define seq_show_option_n(m, name, value, length) {    \
        char val_buf[length + 1];                       \
-       strncpy(val_buf, value, length);                \
+       memcpy(val_buf, value, length);                 \
        val_buf[length] = '\0';                         \
        seq_show_option(m, name, val_buf);              \
 }
index 6d58c57..a156d2e 100644 (file)
@@ -459,7 +459,8 @@ struct uart_port {
                                                struct serial_rs485 *rs485);
        int                     (*iso7816_config)(struct uart_port *,
                                                  struct serial_iso7816 *iso7816);
-       int                     ctrl_id;                /* optional serial core controller id */
+       unsigned int            ctrl_id;                /* optional serial core controller id */
+       unsigned int            port_id;                /* optional serial core port id */
        unsigned int            irq;                    /* irq number */
        unsigned long           irqflags;               /* irq flags  */
        unsigned int            uartclk;                /* base uart clock */
index 9029abd..6b0c626 100644 (file)
 
 /* inode in-kernel data */
 
+#ifdef CONFIG_TMPFS_QUOTA
+#define SHMEM_MAXQUOTAS 2
+#endif
+
 struct shmem_inode_info {
        spinlock_t              lock;
        unsigned int            seals;          /* shmem seals */
@@ -27,6 +31,10 @@ struct shmem_inode_info {
        atomic_t                stop_eviction;  /* hold when working on inode */
        struct timespec64       i_crtime;       /* file creation time */
        unsigned int            fsflags;        /* flags for FS_IOC_[SG]ETFLAGS */
+#ifdef CONFIG_TMPFS_QUOTA
+       struct dquot            *i_dquot[MAXQUOTAS];
+#endif
+       struct offset_ctx       dir_offsets;    /* stable entry offsets */
        struct inode            vfs_inode;
 };
 
@@ -35,11 +43,18 @@ struct shmem_inode_info {
        (FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
 #define SHMEM_FL_INHERITED             (FS_NODUMP_FL | FS_NOATIME_FL)
 
+struct shmem_quota_limits {
+       qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
+       qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */
+       qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */
+       qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */
+};
+
 struct shmem_sb_info {
        unsigned long max_blocks;   /* How many blocks are allowed */
        struct percpu_counter used_blocks;  /* How many are allocated */
        unsigned long max_inodes;   /* How many inodes are allowed */
-       unsigned long free_inodes;  /* How many are left for allocation */
+       unsigned long free_ispace;  /* How much ispace left for allocation */
        raw_spinlock_t stat_lock;   /* Serialize shmem_sb_info changes */
        umode_t mode;               /* Mount mode for root directory */
        unsigned char huge;         /* Whether to try for hugepages */
@@ -53,6 +68,7 @@ struct shmem_sb_info {
        spinlock_t shrinklist_lock;   /* Protects shrinklist */
        struct list_head shrinklist;  /* List of shinkable inodes */
        unsigned long shrinklist_len; /* Length of shrinklist */
+       struct shmem_quota_limits qlimits; /* Default quota limits */
 };
 
 static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
@@ -172,4 +188,17 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
 #endif /* CONFIG_SHMEM */
 #endif /* CONFIG_USERFAULTFD */
 
+/*
+ * Used space is stored as unsigned 64-bit value in bytes but
+ * quota core supports only signed 64-bit values so use that
+ * as a limit
+ */
+#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
+#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
+
+#ifdef CONFIG_TMPFS_QUOTA
+extern const struct dquot_operations shmem_quota_operations;
+extern struct quota_format_type shmem_quota_format;
+#endif /* CONFIG_TMPFS_QUOTA */
+
 #endif
index 054d791..c163751 100644 (file)
@@ -62,6 +62,7 @@ struct sk_psock_progs {
 
 enum sk_psock_state_bits {
        SK_PSOCK_TX_ENABLED,
+       SK_PSOCK_RX_STRP_ENABLED,
 };
 
 struct sk_psock_link {
index 0b85761..fc6c151 100644 (file)
@@ -15,4 +15,6 @@ struct corgi_lcd_platform_data {
        void (*kick_battery)(void);
 };
 
+void corgi_lcd_limit_intensity(int limit);
+
 #endif /* __LINUX_SPI_CORGI_LCD_H */
index 8e984d7..6b0a7dc 100644 (file)
@@ -101,6 +101,7 @@ struct spi_mem_op {
                u8 nbytes;
                u8 buswidth;
                u8 dtr : 1;
+               u8 __pad : 7;
                u16 opcode;
        } cmd;
 
@@ -108,6 +109,7 @@ struct spi_mem_op {
                u8 nbytes;
                u8 buswidth;
                u8 dtr : 1;
+               u8 __pad : 7;
                u64 val;
        } addr;
 
@@ -115,12 +117,14 @@ struct spi_mem_op {
                u8 nbytes;
                u8 buswidth;
                u8 dtr : 1;
+               u8 __pad : 7;
        } dummy;
 
        struct {
                u8 buswidth;
                u8 dtr : 1;
                u8 ecc : 1;
+               u8 __pad : 6;
                enum spi_mem_data_dir dir;
                unsigned int nbytes;
                union {
index ebd7249..4471331 100644 (file)
@@ -48,6 +48,10 @@ void srcu_drive_gp(struct work_struct *wp);
 #define DEFINE_STATIC_SRCU(name) \
        static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name, name)
 
+// Dummy structure for srcu_notifier_head.
+struct srcu_usage { };
+#define __SRCU_USAGE_INIT(name) { }
+
 void synchronize_srcu(struct srcu_struct *ssp);
 
 /*
index 6a8c22b..d324419 100644 (file)
@@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
 
 extern void swake_up_one(struct swait_queue_head *q);
 extern void swake_up_all(struct swait_queue_head *q);
-extern void swake_up_locked(struct swait_queue_head *q);
+extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);
 
 extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
 extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
index 03e3d01..c0cb22c 100644 (file)
@@ -284,22 +284,6 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
 #endif
 
 /*
- * Called before coming back to user-mode. Returning to user-mode with an
- * address limit different than USER_DS can allow to overwrite kernel memory.
- */
-static inline void addr_limit_user_check(void)
-{
-#ifdef TIF_FSCHECK
-       if (!test_thread_flag(TIF_FSCHECK))
-               return;
-#endif
-
-#ifdef TIF_FSCHECK
-       clear_thread_flag(TIF_FSCHECK);
-#endif
-}
-
-/*
  * These syscall function prototypes are kept in the same order as
  * include/uapi/asm-generic/unistd.h. Architecture specific entries go below,
  * followed by deprecated or obsolete system calls.
@@ -438,8 +422,10 @@ asmlinkage long sys_chdir(const char __user *filename);
 asmlinkage long sys_fchdir(unsigned int fd);
 asmlinkage long sys_chroot(const char __user *filename);
 asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
-asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
+asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
                             umode_t mode);
+asmlinkage long sys_fchmodat2(int dfd, const char __user *filename,
+                            umode_t mode, unsigned int flags);
 asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
                             gid_t group, int flag);
 asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
index b4c08ac..91a37c9 100644 (file)
@@ -513,7 +513,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
        struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
        int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn);
 
-       queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
+       WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn));
 }
 
 static inline void tcp_move_syn(struct tcp_sock *tp,
index 8783709..b449a46 100644 (file)
@@ -81,11 +81,13 @@ struct thermal_zone_device_ops {
  * @temperature: temperature value in miliCelsius
  * @hysteresis: relative hysteresis in miliCelsius
  * @type: trip point type
+ * @priv: pointer to driver data associated with this trip
  */
 struct thermal_trip {
        int temperature;
        int hysteresis;
        enum thermal_trip_type type;
+       void *priv;
 };
 
 struct thermal_cooling_device_ops {
@@ -287,6 +289,9 @@ int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
 int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
                          const struct thermal_trip *trip);
 
+int for_each_thermal_trip(struct thermal_zone_device *tz,
+                         int (*cb)(struct thermal_trip *, void *),
+                         void *data);
 int thermal_zone_get_num_trips(struct thermal_zone_device *tz);
 
 int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp);
@@ -301,14 +306,14 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp);
 #ifdef CONFIG_THERMAL
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
                void *, struct thermal_zone_device_ops *,
-               struct thermal_zone_params *, int, int);
+               const struct thermal_zone_params *, int, int);
 
 void thermal_zone_device_unregister(struct thermal_zone_device *);
 
 struct thermal_zone_device *
 thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int,
                                        void *, struct thermal_zone_device_ops *,
-                                       struct thermal_zone_params *, int, int);
+                                       const struct thermal_zone_params *, int, int);
 
 void *thermal_zone_device_priv(struct thermal_zone_device *tzd);
 const char *thermal_zone_device_type(struct thermal_zone_device *tzd);
@@ -323,6 +328,10 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int,
                                       struct thermal_cooling_device *);
 void thermal_zone_device_update(struct thermal_zone_device *,
                                enum thermal_notify_event);
+void thermal_zone_device_exec(struct thermal_zone_device *tz,
+                             void (*cb)(struct thermal_zone_device *,
+                                        unsigned long),
+                             unsigned long data);
 
 struct thermal_cooling_device *thermal_cooling_device_register(const char *,
                void *, const struct thermal_cooling_device_ops *);
@@ -348,7 +357,7 @@ void thermal_zone_device_critical(struct thermal_zone_device *tz);
 static inline struct thermal_zone_device *thermal_zone_device_register(
        const char *type, int trips, int mask, void *devdata,
        struct thermal_zone_device_ops *ops,
-       struct thermal_zone_params *tzp,
+       const struct thermal_zone_params *tzp,
        int passive_delay, int polling_delay)
 { return ERR_PTR(-ENODEV); }
 static inline void thermal_zone_device_unregister(
index 7038104..bb466ee 100644 (file)
@@ -108,12 +108,15 @@ bool torture_must_stop(void);
 bool torture_must_stop_irq(void);
 void torture_kthread_stopping(char *title);
 int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
-                            char *f, struct task_struct **tp);
+                            char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp));
 void _torture_stop_kthread(char *m, struct task_struct **tp);
 
 #define torture_create_kthread(n, arg, tp) \
        _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
-                               "Failed to create " #n, &(tp))
+                               "Failed to create " #n, &(tp), NULL)
+#define torture_create_kthread_cb(n, arg, tp, cbf) \
+       _torture_create_kthread(n, (arg), #n, "Creating " #n " task", \
+                               "Failed to create " #n, &(tp), cbf)
 #define torture_stop_kthread(n, tp) \
        _torture_stop_kthread("Stopping " #n " task", &(tp))
 
index 6a1e8f1..4ee9d13 100644 (file)
@@ -283,6 +283,7 @@ enum tpm_chip_flags {
        TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED    = BIT(6),
        TPM_CHIP_FLAG_FIRMWARE_UPGRADE          = BIT(7),
        TPM_CHIP_FLAG_SUSPENDED                 = BIT(8),
+       TPM_CHIP_FLAG_HWRNG_DISABLED            = BIT(9),
 };
 
 #define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
index 3930e67..1e8bbdb 100644 (file)
@@ -59,6 +59,17 @@ int trace_raw_output_prep(struct trace_iterator *iter,
 extern __printf(2, 3)
 void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);
 
+/* Used to find the offset and length of dynamic fields in trace events */
+struct trace_dynamic_info {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+       u16     offset;
+       u16     len;
+#else
+       u16     len;
+       u16     offset;
+#endif
+};
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
index ff81e5c..42bce38 100644 (file)
@@ -163,7 +163,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
        return ret;
 }
 
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
                                  size_t bytes, struct iov_iter *i);
 void iov_iter_advance(struct iov_iter *i, size_t bytes);
 void iov_iter_revert(struct iov_iter *i, size_t bytes);
@@ -184,6 +184,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
 {
        return copy_page_to_iter(&folio->page, offset, bytes, i);
 }
+
+static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
+               size_t offset, size_t bytes, struct iov_iter *i)
+{
+       return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
+}
+
 size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
                                 size_t bytes, struct iov_iter *i);
 
index bdf8de2..7b4dd69 100644 (file)
@@ -155,6 +155,10 @@ retry:
                if (gso_type & SKB_GSO_UDP)
                        nh_off -= thlen;
 
+               /* Kernel has a special handling for GSO_BY_FRAGS. */
+               if (gso_size == GSO_BY_FRAGS)
+                       return -EINVAL;
+
                /* Too small packets are not really GSO ones. */
                if (skb->len - nh_off > gso_size) {
                        shinfo->gso_size = gso_size;
index a0307b5..5ec7739 100644 (file)
@@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
 }
 
 int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
                unsigned int mode, void *key, wait_queue_entry_t *bookmark);
@@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
 #define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
 #define wake_up_poll(x, m)                                                     \
        __wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
+#define wake_up_poll_on_current_cpu(x, m)                                      \
+       __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
 #define wake_up_locked_poll(x, m)                                              \
        __wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
 #define wake_up_interruptible_poll(x, m)                                       \
index fba9379..083387c 100644 (file)
@@ -375,11 +375,6 @@ void tag_pages_for_writeback(struct address_space *mapping,
                             pgoff_t start, pgoff_t end);
 
 bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
-void folio_account_redirty(struct folio *folio);
-static inline void account_page_redirty(struct page *page)
-{
-       folio_account_redirty(page_folio(page));
-}
 bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
 bool redirty_page_for_writepage(struct writeback_control *, struct page *);
 
index d591ef5..d200518 100644 (file)
@@ -114,13 +114,15 @@ struct simple_xattr {
 };
 
 void simple_xattrs_init(struct simple_xattrs *xattrs);
-void simple_xattrs_free(struct simple_xattrs *xattrs);
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
+size_t simple_xattr_space(const char *name, size_t size);
 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
+void simple_xattr_free(struct simple_xattr *xattr);
 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
                     void *buffer, size_t size);
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
-                    const void *value, size_t size, int flags,
-                    ssize_t *removed_size);
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+                                     const char *name, const void *value,
+                                     size_t size, int flags);
 ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
                          char *buffer, size_t size);
 void simple_xattr_add(struct simple_xattrs *xattrs,
index 9654567..e01d52c 100644 (file)
@@ -593,9 +593,7 @@ struct hci_dev {
        const char              *fw_info;
        struct dentry           *debugfs;
 
-#ifdef CONFIG_DEV_COREDUMP
        struct hci_devcoredump  dump;
-#endif
 
        struct device           dev;
 
@@ -822,6 +820,7 @@ struct hci_conn_params {
 
        struct hci_conn *conn;
        bool explicit_connect;
+       /* Accessed without hdev->lock: */
        hci_conn_flags_t flags;
        u8  privacy_mode;
 };
@@ -1573,7 +1572,11 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
                                            bdaddr_t *addr, u8 addr_type);
 void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
 void hci_conn_params_clear_disabled(struct hci_dev *hdev);
+void hci_conn_params_free(struct hci_conn_params *param);
 
+void hci_pend_le_list_del_init(struct hci_conn_params *param);
+void hci_pend_le_list_add(struct hci_conn_params *param,
+                         struct list_head *list);
 struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
                                                  bdaddr_t *addr,
                                                  u8 addr_type);
index b57bec6..5b8b1b6 100644 (file)
@@ -277,7 +277,7 @@ struct bond_vlan_tag {
        unsigned short  vlan_id;
 };
 
-/**
+/*
  * Returns NULL if the net_device does not belong to any of the bond's slaves
  *
  * Caller must hold bond lock for read
@@ -722,23 +722,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
 }
 
 /* Caller must hold rcu_read_lock() for read */
-static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
+static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac)
 {
        struct list_head *iter;
        struct slave *tmp;
-       struct netdev_hw_addr *ha;
 
        bond_for_each_slave_rcu(bond, tmp, iter)
                if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
                        return true;
-
-       if (netdev_uc_empty(bond->dev))
-               return false;
-
-       netdev_for_each_uc_addr(ha, bond->dev)
-               if (ether_addr_equal_64bits(mac, ha->addr))
-                       return true;
-
        return false;
 }
 
index 7c7d03a..d6fa7c8 100644 (file)
@@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
        if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
                return NULL;
 
+       if (iftype == NL80211_IFTYPE_AP_VLAN)
+               iftype = NL80211_IFTYPE_AP;
+
        for (i = 0; i < sband->n_iftype_data; i++)  {
                const struct ieee80211_sband_iftype_data *data =
                        &sband->iftype_data[i];
index e000579..f79ce13 100644 (file)
@@ -170,7 +170,8 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b)
 }
 
 /**
- * @WPAN_PHY_FLAG_TRANSMIT_POWER: Indicates that transceiver will support
+ * enum wpan_phy_flags - WPAN PHY state flags
+ * @WPAN_PHY_FLAG_TXPOWER: Indicates that transceiver will support
  *     transmit power setting.
  * @WPAN_PHY_FLAG_CCA_ED_LEVEL: Indicates that transceiver will support cca ed
  *     level setting.
index 5fed2f1..aa80f74 100644 (file)
@@ -145,8 +145,8 @@ struct codel_vars {
  * @maxpacket: largest packet we've seen so far
  * @drop_count:        temp count of dropped packets in dequeue()
  * @drop_len:  bytes of dropped packets in dequeue()
- * ecn_mark:   number of packets we ECN marked instead of dropping
- * ce_mark:    number of packets CE marked because sojourn time was above ce_threshold
+ * @ecn_mark:  number of packets we ECN marked instead of dropping
+ * @ce_mark:   number of packets CE marked because sojourn time was above ce_threshold
  */
 struct codel_stats {
        u32             maxpacket;
index 9a3c51a..0cdb4b1 100644 (file)
@@ -221,7 +221,7 @@ struct devlink_dpipe_field {
 /**
  * struct devlink_dpipe_header - dpipe header object
  * @name: header name
- * @id: index, global/local detrmined by global bit
+ * @id: index, global/local determined by global bit
  * @fields: fields
  * @fields_count: number of fields
  * @global: indicates if header is shared like most protocol header
@@ -241,7 +241,7 @@ struct devlink_dpipe_header {
  * @header_index: header index (packets can have several headers of same
  *               type like in case of tunnels)
  * @header: header
- * @fieled_id: field index
+ * @field_id: field index
  */
 struct devlink_dpipe_match {
        enum devlink_dpipe_match_type type;
@@ -256,7 +256,7 @@ struct devlink_dpipe_match {
  * @header_index: header index (packets can have several headers of same
  *               type like in case of tunnels)
  * @header: header
- * @fieled_id: field index
+ * @field_id: field index
  */
 struct devlink_dpipe_action {
        enum devlink_dpipe_action_type type;
@@ -292,7 +292,7 @@ struct devlink_dpipe_value {
  * struct devlink_dpipe_entry - table entry object
  * @index: index of the entry in the table
  * @match_values: match values
- * @matche_values_count: count of matches tuples
+ * @match_values_count: count of matches tuples
  * @action_values: actions values
  * @action_values_count: count of actions values
  * @counter: value of counter
@@ -342,7 +342,9 @@ struct devlink_dpipe_table_ops;
  */
 struct devlink_dpipe_table {
        void *priv;
+       /* private: */
        struct list_head list;
+       /* public: */
        const char *name;
        bool counters_enabled;
        bool counter_control_extern;
@@ -355,13 +357,13 @@ struct devlink_dpipe_table {
 
 /**
  * struct devlink_dpipe_table_ops - dpipe_table ops
- * @actions_dump - dumps all tables actions
- * @matches_dump - dumps all tables matches
- * @entries_dump - dumps all active entries in the table
- * @counters_set_update - when changing the counter status hardware sync
+ * @actions_dump: dumps all tables actions
+ * @matches_dump: dumps all tables matches
+ * @entries_dump: dumps all active entries in the table
+ * @counters_set_update when changing the counter status hardware sync
  *                       maybe needed to allocate/free counter related
  *                       resources
- * @size_get - get size
+ * @size_get: get size
  */
 struct devlink_dpipe_table_ops {
        int (*actions_dump)(void *priv, struct sk_buff *skb);
@@ -374,8 +376,8 @@ struct devlink_dpipe_table_ops {
 
 /**
  * struct devlink_dpipe_headers - dpipe headers
- * @headers - header array can be shared (global bit) or driver specific
- * @headers_count - count of headers
+ * @headers: header array can be shared (global bit) or driver specific
+ * @headers_count: count of headers
  */
 struct devlink_dpipe_headers {
        struct devlink_dpipe_header **headers;
@@ -387,7 +389,7 @@ struct devlink_dpipe_headers {
  * @size_min: minimum size which can be set
  * @size_max: maximum size which can be set
  * @size_granularity: size granularity
- * @size_unit: resource's basic unit
+ * @unit: resource's basic unit
  */
 struct devlink_resource_size_params {
        u64 size_min;
@@ -457,6 +459,7 @@ struct devlink_flash_notify {
 
 /**
  * struct devlink_param - devlink configuration parameter data
+ * @id: devlink parameter id number
  * @name: name of the parameter
  * @generic: indicates if the parameter is generic or driver specific
  * @type: parameter type
@@ -632,6 +635,7 @@ enum devlink_param_generic_id {
  * struct devlink_flash_update_params - Flash Update parameters
  * @fw: pointer to the firmware data to update from
  * @component: the flash component to update
+ * @overwrite_mask: which types of flash update are supported (may be %0)
  *
  * With the exception of fw, drivers must opt-in to parameters by
  * setting the appropriate bit in the supported_flash_update_params field in
index 75efa6f..88644b3 100644 (file)
@@ -452,6 +452,49 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
                gro_normal_list(napi);
 }
 
+/* This function is the alternative of 'inet_iif' and 'inet_sdif'
+ * functions in case we can not rely on fields of IPCB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+       *iif = inet_iif(skb) ?: skb->dev->ifindex;
+       *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+       if (netif_is_l3_slave(skb->dev)) {
+               struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+               *sdif = *iif;
+               *iif = master ? master->ifindex : 0;
+       }
+#endif
+}
+
+/* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
+ * functions in case we can not rely on fields of IP6CB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+       /* using skb->dev->ifindex because skb_dst(skb) is not initialized */
+       *iif = skb->dev->ifindex;
+       *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+       if (netif_is_l3_slave(skb->dev)) {
+               struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+               *sdif = *iif;
+               *iif = master ? master->ifindex : 0;
+       }
+#endif
+}
+
 extern struct list_head offload_base;
 
 #endif /* _NET_IPV6_GRO_H */
index 325ad89..1539606 100644 (file)
@@ -29,7 +29,7 @@ struct fqdir {
 };
 
 /**
- * fragment queue flags
+ * enum: fragment queue flags
  *
  * @INET_FRAG_FIRST_IN: first fragment has arrived
  * @INET_FRAG_LAST_IN: final fragment has arrived
index caa20a9..491ceb7 100644 (file)
@@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
 
 static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
 {
-       if (!sk->sk_mark &&
-           READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+       u32 mark = READ_ONCE(sk->sk_mark);
+
+       if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
                return skb->mark;
 
-       return sk->sk_mark;
+       return mark;
 }
 
 static inline int inet_request_bound_dev_if(const struct sock *sk,
@@ -221,8 +222,8 @@ struct inet_sock {
        __s16                   uc_ttl;
        __u16                   cmsg_flags;
        struct ip_options_rcu __rcu     *inet_opt;
+       atomic_t                inet_id;
        __be16                  inet_sport;
-       __u16                   inet_id;
 
        __u8                    tos;
        __u8                    min_ttl;
index 50d4358..19adacd 100644 (file)
@@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
 {
        ipcm_init(ipcm);
 
-       ipcm->sockc.mark = inet->sk.sk_mark;
+       ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
        ipcm->sockc.tsflags = inet->sk.sk_tsflags;
        ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
        ipcm->addr = inet->inet_saddr;
@@ -538,8 +538,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
         * generator as much as we can.
         */
        if (sk && inet_sk(sk)->inet_daddr) {
-               iph->id = htons(inet_sk(sk)->inet_id);
-               inet_sk(sk)->inet_id += segs;
+               int val;
+
+               /* avoid atomic operations for TCP,
+                * as we hold socket lock at this point.
+                */
+               if (sk_is_tcp(sk)) {
+                       sock_owned_by_me(sk);
+                       val = atomic_read(&inet_sk(sk)->inet_id);
+                       atomic_set(&inet_sk(sk)->inet_id, val + segs);
+               } else {
+                       val = atomic_add_return(segs, &inet_sk(sk)->inet_id);
+               }
+               iph->id = htons(val);
                return;
        }
        if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
index 7332296..2acc4c8 100644 (file)
@@ -752,12 +752,8 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a)
 /* more secured version of ipv6_addr_hash() */
 static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
 {
-       u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
-
-       return jhash_3words(v,
-                           (__force u32)a->s6_addr32[2],
-                           (__force u32)a->s6_addr32[3],
-                           initval);
+       return jhash2((__force const u32 *)a->s6_addr32,
+                     ARRAY_SIZE(a->s6_addr32), initval);
 }
 
 static inline bool ipv6_addr_loopback(const struct in6_addr *a)
index 2c1ea34..374411b 100644 (file)
@@ -111,7 +111,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
 void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
 int llc_conn_remove_acked_pdus(struct sock *conn, u8 nr, u16 *how_many_unacked);
 struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr,
-                                   struct llc_addr *laddr);
+                                   struct llc_addr *laddr, const struct net *net);
 void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk);
 void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk);
 
index 49aa79c..7e73f8e 100644 (file)
@@ -269,7 +269,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
 /**
  *     llc_pdu_decode_da - extracts dest address of input frame
  *     @skb: input skb that destination address must be extracted from it
- *     @sa: pointer to destination address (6 byte array).
+ *     @da: pointer to destination address (6 byte array).
  *
  *     This function extracts destination address(MAC) of input frame.
  */
@@ -321,7 +321,7 @@ static inline void llc_pdu_init_as_ui_cmd(struct sk_buff *skb)
 
 /**
  *     llc_pdu_init_as_test_cmd - sets PDU as TEST
- *     @skb - Address of the skb to build
+ *     @skb: Address of the skb to build
  *
  *     Sets a PDU as TEST
  */
@@ -369,6 +369,8 @@ struct llc_xid_info {
 /**
  *     llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID
  *     @skb: input skb that header must be set into it.
+ *     @svcs_supported: The class of the LLC (I or II)
+ *     @rx_window: The size of the receive window of the LLC
  *
  *     This function sets third,fourth,fifth and sixth bytes of LLC header as
  *     a XID PDU.
index 3a8a2d2..2a55ae9 100644 (file)
@@ -6612,6 +6612,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap,
  * marks frames marked in the bitmap as having been filtered. Afterwards, it
  * checks if any frames in the window starting from @ssn can now be released
  * (in case they were only waiting for frames that were filtered.)
+ * (Only work correctly if @max_rx_aggregation_subframes <= 64 frames)
  */
 void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
                                          u16 ssn, u64 filtered,
index 9334371..f7dd950 100644 (file)
@@ -67,6 +67,9 @@ struct nf_conntrack_tuple {
                /* The protocol. */
                u_int8_t protonum;
 
+               /* The direction must be ignored for the tuplehash */
+               struct { } __nfct_hash_offsetend;
+
                /* The direction (for tuplehash) */
                u_int8_t dir;
        } dst;
index 84f2fd8..dd40c75 100644 (file)
@@ -512,6 +512,7 @@ struct nft_set_elem_expr {
  *
  *     @list: table set list node
  *     @bindings: list of set bindings
+ *     @refs: internal refcounting for async set destruction
  *     @table: table this set belongs to
  *     @net: netnamespace this set belongs to
  *     @name: name of the set
@@ -533,6 +534,7 @@ struct nft_set_elem_expr {
  *     @expr: stateful expression
  *     @ops: set ops
  *     @flags: set flags
+ *     @dead: set will be freed, never cleared
  *     @genmask: generation mask
  *     @klen: key length
  *     @dlen: data length
@@ -541,6 +543,7 @@ struct nft_set_elem_expr {
 struct nft_set {
        struct list_head                list;
        struct list_head                bindings;
+       refcount_t                      refs;
        struct nft_table                *table;
        possible_net_t                  net;
        char                            *name;
@@ -562,7 +565,8 @@ struct nft_set {
        struct list_head                pending_update;
        /* runtime data below here */
        const struct nft_set_ops        *ops ____cacheline_aligned;
-       u16                             flags:14,
+       u16                             flags:13,
+                                       dead:1,
                                        genmask:2;
        u8                              klen;
        u8                              dlen;
@@ -583,6 +587,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
        return (void *)set->data;
 }
 
+static inline bool nft_set_gc_is_pending(const struct nft_set *s)
+{
+       return refcount_read(&s->refs) != 1;
+}
+
 static inline struct nft_set *nft_set_container_of(const void *priv)
 {
        return (void *)priv - offsetof(struct nft_set, data);
@@ -596,7 +605,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
 
 struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
                                            const struct nft_set *set);
-void *nft_set_catchall_gc(const struct nft_set *set);
 
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
@@ -813,62 +821,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
 void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
                                const struct nft_set *set, void *elem);
 
-/**
- *     struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- *     @rcu: rcu head
- *     @set: set the elements belong to
- *     @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
-       struct rcu_head                 rcu;
-       const struct nft_set            *set;
-       unsigned int                    cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE  ((PAGE_SIZE -                             \
-                                 sizeof(struct nft_set_gc_batch_head)) / \
-                                sizeof(void *))
-
-/**
- *     struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- *     @head: GC batch head
- *     @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
-       struct nft_set_gc_batch_head    head;
-       void                            *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
-                                               gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
-       if (gcb != NULL)
-               call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
-                      gfp_t gfp)
-{
-       if (gcb != NULL) {
-               if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
-                       return gcb;
-               nft_set_gc_batch_complete(gcb);
-       }
-       return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
-                                       void *elem)
-{
-       gcb->elems[gcb->head.cnt++] = elem;
-}
-
 struct nft_expr_ops;
 /**
  *     struct nft_expr_type - nf_tables expression type
@@ -1211,6 +1163,29 @@ int __nft_release_basechain(struct nft_ctx *ctx);
 
 unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv);
 
+static inline bool nft_use_inc(u32 *use)
+{
+       if (*use == UINT_MAX)
+               return false;
+
+       (*use)++;
+
+       return true;
+}
+
+static inline void nft_use_dec(u32 *use)
+{
+       WARN_ON_ONCE((*use)-- == 0);
+}
+
+/* For error and abort path: restore use counter to previous state. */
+static inline void nft_use_inc_restore(u32 *use)
+{
+       WARN_ON_ONCE(!nft_use_inc(use));
+}
+
+#define nft_use_dec_restore    nft_use_dec
+
 /**
  *     struct nft_table - nf_tables table
  *
@@ -1296,8 +1271,8 @@ struct nft_object {
        struct list_head                list;
        struct rhlist_head              rhlhead;
        struct nft_object_hash_key      key;
-       u32                             genmask:2,
-                                       use:30;
+       u32                             genmask:2;
+       u32                             use;
        u64                             handle;
        u16                             udlen;
        u8                              *udata;
@@ -1399,8 +1374,8 @@ struct nft_flowtable {
        char                            *name;
        int                             hooknum;
        int                             ops_len;
-       u32                             genmask:2,
-                                       use:30;
+       u32                             genmask:2;
+       u32                             use;
        u64                             handle;
        /* runtime data below here */
        struct list_head                hook_list ____cacheline_aligned;
@@ -1534,39 +1509,30 @@ static inline void nft_set_elem_change_active(const struct net *net,
 
 #endif /* IS_ENABLED(CONFIG_NF_TABLES) */
 
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
 
 #if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT  2
+#define NFT_SET_ELEM_DEAD_BIT  2
 #elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT  (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#define NFT_SET_ELEM_DEAD_BIT  (BITS_PER_LONG - BITS_PER_BYTE + 2)
 #else
 #error
 #endif
 
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
 {
        unsigned long *word = (unsigned long *)ext;
 
        BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
-       return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+       set_bit(NFT_SET_ELEM_DEAD_BIT, word);
 }
 
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
 {
        unsigned long *word = (unsigned long *)ext;
 
-       clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+       BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+       return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
 }
 
 /**
@@ -1709,6 +1675,38 @@ struct nft_trans_flowtable {
 #define nft_trans_flowtable_flags(trans)       \
        (((struct nft_trans_flowtable *)trans->data)->flags)
 
+#define NFT_TRANS_GC_BATCHCOUNT        256
+
+struct nft_trans_gc {
+       struct list_head        list;
+       struct net              *net;
+       struct nft_set          *set;
+       u32                     seq;
+       u8                      count;
+       void                    *priv[NFT_TRANS_GC_BATCHCOUNT];
+       struct rcu_head         rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+                                       unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+                                             unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+                                          unsigned int gc_seq);
+
+void nft_setelem_data_deactivate(const struct net *net,
+                                const struct nft_set *set,
+                                struct nft_set_elem *elem);
+
 int __init nft_chain_filter_init(void);
 void nft_chain_filter_fini(void);
 
@@ -1735,6 +1733,8 @@ struct nftables_pernet {
        struct mutex            commit_mutex;
        u64                     table_handle;
        unsigned int            base_seq;
+       unsigned int            gc_seq;
+       u8                      validate_state;
 };
 
 extern unsigned int nf_tables_net_id;
index 350b1ad..16a7510 100644 (file)
 
 /**
  * struct nsh_md1_ctx - Keeps track of NSH context data
- * @nshc<1-4>: NSH Contexts.
+ * @context: NSH Contexts.
  */
 struct nsh_md1_ctx {
        __be32 context[4];
index 3fe2361..01cbc66 100644 (file)
@@ -17,7 +17,7 @@
 /**
  * struct pie_params - contains pie parameters
  * @target:            target delay in pschedtime
- * @tudpate:           interval at which drop probability is calculated
+ * @tupdate:           interval at which drop probability is calculated
  * @limit:             total number of packets that can be in the queue
  * @alpha:             parameter to control drop probability
  * @beta:              parameter to control drop probability
index e98aac9..1596056 100644 (file)
@@ -134,7 +134,7 @@ extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
  */
 static inline unsigned int psched_mtu(const struct net_device *dev)
 {
-       return dev->mtu + dev->hard_header_len;
+       return READ_ONCE(dev->mtu) + dev->hard_header_len;
 }
 
 static inline struct net *qdisc_net(struct Qdisc *q)
index 5a5c726..8c2a8e7 100644 (file)
@@ -168,7 +168,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
                                                   __be16 dport, __be16 sport,
                                                   __u8 proto, __u8 tos, int oif)
 {
-       flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
+       flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
                           RT_SCOPE_UNIVERSE, proto,
                           sk ? inet_sk_flowi_flags(sk) : 0,
                           daddr, saddr, dport, sport, sock_net_uid(net, sk));
@@ -301,7 +301,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
        if (inet_sk(sk)->transparent)
                flow_flags |= FLOWI_FLAG_ANYSRC;
 
-       flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
+       flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
                           ip_sock_rt_scope(sk), protocol, flow_flags, dst,
                           src, dport, sport, sk->sk_uid);
 }
index 040f07b..b228376 100644 (file)
@@ -1,4 +1,4 @@
-/**
+/*
  * Copyright (c) 2017 Redpine Signals Inc.
  *
  * Permission to use, copy, modify, and/or distribute this software for any
index d9076a7..6506221 100644 (file)
@@ -190,8 +190,8 @@ int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *
 int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
                        u32 portid, const struct nlmsghdr *nlh);
 
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
-                       struct netlink_ext_ack *exterr);
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+                            struct netlink_ext_ack *exterr);
 struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid);
 
 #define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
index 2eb916d..690e221 100644 (file)
@@ -1323,6 +1323,7 @@ struct proto {
        /*
         * Pressure flag: try to collapse.
         * Technical note: it is used by multiple contexts non atomically.
+        * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
         * All the __sk_mem_schedule() is of this nature: accounting
         * is strict, actions are advisory and have some latency.
         */
@@ -1420,6 +1421,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
        return sk->sk_prot->memory_pressure != NULL;
 }
 
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+       return sk->sk_prot->memory_pressure &&
+               !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
 static inline bool sk_under_memory_pressure(const struct sock *sk)
 {
        if (!sk->sk_prot->memory_pressure)
@@ -1429,7 +1436,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
            mem_cgroup_under_socket_pressure(sk->sk_memcg))
                return true;
 
-       return !!*sk->sk_prot->memory_pressure;
+       return !!READ_ONCE(*sk->sk_prot->memory_pressure);
 }
 
 static inline long
@@ -1506,7 +1513,7 @@ proto_memory_pressure(struct proto *prot)
 {
        if (!prot->memory_pressure)
                return false;
-       return !!*prot->memory_pressure;
+       return !!READ_ONCE(*prot->memory_pressure);
 }
 
 
index 226bce6..0ca972e 100644 (file)
@@ -1509,25 +1509,38 @@ void tcp_leave_memory_pressure(struct sock *sk);
 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
 {
        struct net *net = sock_net((struct sock *)tp);
+       int val;
 
-       return tp->keepalive_intvl ? :
-               READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
+       /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl()
+        * and do_tcp_setsockopt().
+        */
+       val = READ_ONCE(tp->keepalive_intvl);
+
+       return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
 }
 
 static inline int keepalive_time_when(const struct tcp_sock *tp)
 {
        struct net *net = sock_net((struct sock *)tp);
+       int val;
 
-       return tp->keepalive_time ? :
-               READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
+       /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */
+       val = READ_ONCE(tp->keepalive_time);
+
+       return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
 }
 
 static inline int keepalive_probes(const struct tcp_sock *tp)
 {
        struct net *net = sock_net((struct sock *)tp);
+       int val;
 
-       return tp->keepalive_probes ? :
-               READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
+       /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt()
+        * and do_tcp_setsockopt().
+        */
+       val = READ_ONCE(tp->keepalive_probes);
+
+       return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
 }
 
 static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -2048,7 +2061,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
 static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
 {
        struct net *net = sock_net((struct sock *)tp);
-       return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+       u32 val;
+
+       val = READ_ONCE(tp->notsent_lowat);
+
+       return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
 }
 
 bool tcp_stream_memory_free(const struct sock *sk, int wake);
index 0be91ca..6a9f8a5 100644 (file)
@@ -386,10 +386,15 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
        return features;
 }
 
-/* IP header + UDP + VXLAN + Ethernet header */
-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
-/* IPv6 header + UDP + VXLAN + Ethernet header */
-#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
+static inline int vxlan_headroom(u32 flags)
+{
+       /* VXLAN:     IP4/6 header + UDP + VXLAN + Ethernet header */
+       /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */
+       return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) :
+                                      sizeof(struct iphdr)) +
+              sizeof(struct udphdr) + sizeof(struct vxlanhdr) +
+              (flags & VXLAN_F_GPE ? 0 : ETH_HLEN);
+}
 
 static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
 {
@@ -551,12 +556,12 @@ static inline void vxlan_flag_attr_error(int attrtype,
 }
 
 static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
-                                           int hash,
+                                           u32 hash,
                                            struct vxlan_rdst *rdst)
 {
        struct fib_nh_common *nhc;
 
-       nhc = nexthop_path_fdb_result(nh, hash);
+       nhc = nexthop_path_fdb_result(nh, hash >> 1);
        if (unlikely(!nhc))
                return false;
 
index 151ca95..363c7d5 100644 (file)
@@ -1984,6 +1984,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
                if (dev->xfrmdev_ops->xdo_dev_state_free)
                        dev->xfrmdev_ops->xdo_dev_state_free(x);
                xso->dev = NULL;
+               xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
                netdev_put(dev, &xso->dev_tracker);
        }
 }
index 75b2235..b9230b6 100644 (file)
@@ -194,6 +194,7 @@ struct scsi_device {
        unsigned no_start_on_add:1;     /* do not issue start on add */
        unsigned allow_restart:1; /* issue START_UNIT in error handler */
        unsigned manage_start_stop:1;   /* Let HLD (sd) manage start/stop */
+       unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */
        unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */
        unsigned no_uld_attach:1; /* disable connecting to upper level drivers */
        unsigned select_no_atn:1;
index 22aae50..a8c2817 100644 (file)
@@ -663,6 +663,7 @@ struct ocelot_ops {
                              struct flow_stats *stats);
        void (*cut_through_fwd)(struct ocelot *ocelot);
        void (*tas_clock_adjust)(struct ocelot *ocelot);
+       void (*tas_guard_bands_update)(struct ocelot *ocelot, int port);
        void (*update_stats)(struct ocelot *ocelot);
 };
 
@@ -863,12 +864,12 @@ struct ocelot {
        struct mutex                    stat_view_lock;
        /* Lock for serializing access to the MAC table */
        struct mutex                    mact_lock;
-       /* Lock for serializing forwarding domain changes */
+       /* Lock for serializing forwarding domain changes, including the
+        * configuration of the Time-Aware Shaper, MAC Merge layer and
+        * cut-through forwarding, on which it depends
+        */
        struct mutex                    fwd_domain_lock;
 
-       /* Lock for serializing Time-Aware Shaper changes */
-       struct mutex                    tas_lock;
-
        struct workqueue_struct         *owq;
 
        u8                              ptp:1;
index fc30014..a5ef849 100644 (file)
@@ -175,6 +175,9 @@ struct tegra_mc_icc_ops {
        int (*get_bw)(struct icc_node *node, u32 *avg, u32 *peak);
 };
 
+struct icc_node *tegra_mc_icc_xlate(struct of_phandle_args *spec, void *data);
+extern const struct tegra_mc_icc_ops tegra_mc_icc_ops;
+
 struct tegra_mc_ops {
        /*
         * @probe: Callback to set up SoC-specific bits of the memory controller. This is called
index a8206f5..b2db2c2 100644 (file)
@@ -38,7 +38,6 @@ struct find_free_extent_ctl;
        __print_symbolic(type,                                          \
                { BTRFS_TREE_BLOCK_REF_KEY,     "TREE_BLOCK_REF" },     \
                { BTRFS_EXTENT_DATA_REF_KEY,    "EXTENT_DATA_REF" },    \
-               { BTRFS_EXTENT_REF_V0_KEY,      "EXTENT_REF_V0" },      \
                { BTRFS_SHARED_BLOCK_REF_KEY,   "SHARED_BLOCK_REF" },   \
                { BTRFS_SHARED_DATA_REF_KEY,    "SHARED_DATA_REF" })
 
@@ -2482,7 +2481,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio,
                __entry->offset, __entry->opf, __entry->physical, __entry->len)
 );
 
-DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
+DEFINE_EVENT(btrfs_raid56_bio, raid56_read,
        TP_PROTO(const struct btrfs_raid_bio *rbio,
                 const struct bio *bio,
                 const struct raid56_bio_trace_info *trace_info),
@@ -2490,32 +2489,7 @@ DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
        TP_ARGS(rbio, bio, trace_info)
 );
 
-DEFINE_EVENT(btrfs_raid56_bio, raid56_write_stripe,
-       TP_PROTO(const struct btrfs_raid_bio *rbio,
-                const struct bio *bio,
-                const struct raid56_bio_trace_info *trace_info),
-
-       TP_ARGS(rbio, bio, trace_info)
-);
-
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_write_stripe,
-       TP_PROTO(const struct btrfs_raid_bio *rbio,
-                const struct bio *bio,
-                const struct raid56_bio_trace_info *trace_info),
-
-       TP_ARGS(rbio, bio, trace_info)
-);
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read,
-       TP_PROTO(const struct btrfs_raid_bio *rbio,
-                const struct bio *bio,
-                const struct raid56_bio_trace_info *trace_info),
-
-       TP_ARGS(rbio, bio, trace_info)
-);
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read_recover,
+DEFINE_EVENT(btrfs_raid56_bio, raid56_write,
        TP_PROTO(const struct btrfs_raid_bio *rbio,
                 const struct bio *bio,
                 const struct raid56_bio_trace_info *trace_info),
index 71dbe8b..e18684b 100644 (file)
@@ -80,11 +80,11 @@ TRACE_EVENT(erofs_fill_inode,
                  __entry->blkaddr, __entry->ofs)
 );
 
-TRACE_EVENT(erofs_readpage,
+TRACE_EVENT(erofs_read_folio,
 
-       TP_PROTO(struct page *page, bool raw),
+       TP_PROTO(struct folio *folio, bool raw),
 
-       TP_ARGS(page, raw),
+       TP_ARGS(folio, raw),
 
        TP_STRUCT__entry(
                __field(dev_t,          dev     )
@@ -96,11 +96,11 @@ TRACE_EVENT(erofs_readpage,
        ),
 
        TP_fast_assign(
-               __entry->dev    = page->mapping->host->i_sb->s_dev;
-               __entry->nid    = EROFS_I(page->mapping->host)->nid;
-               __entry->dir    = S_ISDIR(page->mapping->host->i_mode);
-               __entry->index  = page->index;
-               __entry->uptodate = PageUptodate(page);
+               __entry->dev    = folio->mapping->host->i_sb->s_dev;
+               __entry->nid    = EROFS_I(folio->mapping->host)->nid;
+               __entry->dir    = S_ISDIR(folio->mapping->host->i_mode);
+               __entry->index  = folio->index;
+               __entry->uptodate = folio_test_uptodate(folio);
                __entry->raw = raw;
        ),
 
index 8f5ee38..5646ae1 100644 (file)
@@ -462,11 +462,9 @@ TRACE_EVENT(jbd2_shrink_scan_exit,
 TRACE_EVENT(jbd2_shrink_checkpoint_list,
 
        TP_PROTO(journal_t *journal, tid_t first_tid, tid_t tid, tid_t last_tid,
-                unsigned long nr_freed, unsigned long nr_scanned,
-                tid_t next_tid),
+                unsigned long nr_freed, tid_t next_tid),
 
-       TP_ARGS(journal, first_tid, tid, last_tid, nr_freed,
-               nr_scanned, next_tid),
+       TP_ARGS(journal, first_tid, tid, last_tid, nr_freed, next_tid),
 
        TP_STRUCT__entry(
                __field(dev_t, dev)
@@ -474,7 +472,6 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list,
                __field(tid_t, tid)
                __field(tid_t, last_tid)
                __field(unsigned long, nr_freed)
-               __field(unsigned long, nr_scanned)
                __field(tid_t, next_tid)
        ),
 
@@ -484,15 +481,14 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list,
                __entry->tid            = tid;
                __entry->last_tid       = last_tid;
                __entry->nr_freed       = nr_freed;
-               __entry->nr_scanned     = nr_scanned;
                __entry->next_tid       = next_tid;
        ),
 
        TP_printk("dev %d,%d shrink transaction %u-%u(%u) freed %lu "
-                 "scanned %lu next transaction %u",
+                 "next transaction %u",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->first_tid, __entry->tid, __entry->last_tid,
-                 __entry->nr_freed, __entry->nr_scanned, __entry->next_tid)
+                 __entry->nr_freed, __entry->next_tid)
 );
 
 #endif /* _TRACE_JBD2_H */
index bf06db8..7b1ddff 100644 (file)
@@ -381,6 +381,7 @@ TRACE_EVENT(tcp_cong_state_set,
                __field(const void *, skaddr)
                __field(__u16, sport)
                __field(__u16, dport)
+               __field(__u16, family)
                __array(__u8, saddr, 4)
                __array(__u8, daddr, 4)
                __array(__u8, saddr_v6, 16)
@@ -396,6 +397,7 @@ TRACE_EVENT(tcp_cong_state_set,
 
                __entry->sport = ntohs(inet->inet_sport);
                __entry->dport = ntohs(inet->inet_dport);
+               __entry->family = sk->sk_family;
 
                p32 = (__be32 *) __entry->saddr;
                *p32 = inet->inet_saddr;
@@ -409,7 +411,8 @@ TRACE_EVENT(tcp_cong_state_set,
                __entry->cong_state = ca_state;
        ),
 
-       TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+       TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+                 show_family_name(__entry->family),
                  __entry->sport, __entry->dport,
                  __entry->saddr, __entry->daddr,
                  __entry->saddr_v6, __entry->daddr_v6,
index fd6c1cb..abe087c 100644 (file)
@@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 #define __NR_cachestat 451
 __SYSCALL(__NR_cachestat, sys_cachestat)
 
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+
 #undef __NR_syscalls
-#define __NR_syscalls 452
+#define __NR_syscalls 453
 
 /*
  * 32 bit systems traditionally used different
index b80fcc9..f85743e 100644 (file)
@@ -51,13 +51,13 @@ enum blk_zone_type {
  *
  * The Zone Condition state machine in the ZBC/ZAC standards maps the above
  * deinitions as:
- *   - ZC1: Empty         | BLK_ZONE_EMPTY
+ *   - ZC1: Empty         | BLK_ZONE_COND_EMPTY
  *   - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN
  *   - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN
- *   - ZC4: Closed        | BLK_ZONE_CLOSED
- *   - ZC5: Full          | BLK_ZONE_FULL
- *   - ZC6: Read Only     | BLK_ZONE_READONLY
- *   - ZC7: Offline       | BLK_ZONE_OFFLINE
+ *   - ZC4: Closed        | BLK_ZONE_COND_CLOSED
+ *   - ZC5: Full          | BLK_ZONE_COND_FULL
+ *   - ZC6: Read Only     | BLK_ZONE_COND_READONLY
+ *   - ZC7: Offline       | BLK_ZONE_COND_OFFLINE
  *
  * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
  * be considered invalid.
index ab38d0f..fc3c321 100644 (file)
 
 #define BTRFS_EXTENT_DATA_REF_KEY      178
 
-#define BTRFS_EXTENT_REF_V0_KEY                180
+/*
+ * Obsolete key. Defintion removed in 6.6, value may be reused in the future.
+ *
+ * #define BTRFS_EXTENT_REF_V0_KEY     180
+ */
 
 #define BTRFS_SHARED_BLOCK_REF_KEY     182
 
index 0c8cf35..e0e1591 100644 (file)
@@ -443,7 +443,6 @@ typedef struct elf64_shdr {
 #define NT_MIPS_DSP    0x800           /* MIPS DSP ASE registers */
 #define NT_MIPS_FP_MODE        0x801           /* MIPS floating-point mode */
 #define NT_MIPS_MSA    0x802           /* MIPS SIMD registers */
-#define NT_RISCV_VECTOR        0x900           /* RISC-V vector registers */
 #define NT_LOONGARCH_CPUCFG    0xa00   /* LoongArch CPU config registers */
 #define NT_LOONGARCH_CSR       0xa01   /* LoongArch control and status registers */
 #define NT_LOONGARCH_LSX       0xa02   /* LoongArch Loongson SIMD Extension registers */
index 1b9d0df..b3fcab1 100644 (file)
  *  - add extension header
  *  - add FUSE_EXT_GROUPS
  *  - add FUSE_CREATE_SUPP_GROUP
+ *  - add FUSE_HAS_EXPIRE_ONLY
  */
 
 #ifndef _LINUX_FUSE_H
@@ -369,6 +370,7 @@ struct fuse_file_lock {
  * FUSE_HAS_INODE_DAX:  use per inode DAX
  * FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
  *                     symlink and mknod (single group that matches parent)
+ * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
  */
 #define FUSE_ASYNC_READ                (1 << 0)
 #define FUSE_POSIX_LOCKS       (1 << 1)
@@ -406,6 +408,7 @@ struct fuse_file_lock {
 #define FUSE_SECURITY_CTX      (1ULL << 32)
 #define FUSE_HAS_INODE_DAX     (1ULL << 33)
 #define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
+#define FUSE_HAS_EXPIRE_ONLY   (1ULL << 35)
 
 /**
  * CUSE INIT request/reply flags
index 9efc423..4d0ad22 100644 (file)
@@ -18,7 +18,11 @@ struct sockaddr_ll {
        unsigned short  sll_hatype;
        unsigned char   sll_pkttype;
        unsigned char   sll_halen;
-       unsigned char   sll_addr[8];
+       union {
+               unsigned char   sll_addr[8];
+               /* Actual length is in sll_halen. */
+               __DECLARE_FLEX_ARRAY(unsigned char, sll_addr_flex);
+       };
 };
 
 /* Packet types */
index 8eb0d7b..bb242fd 100644 (file)
@@ -100,8 +100,9 @@ enum fsconfig_command {
        FSCONFIG_SET_PATH       = 3,    /* Set parameter, supplying an object by path */
        FSCONFIG_SET_PATH_EMPTY = 4,    /* Set parameter, supplying an object by (empty) path */
        FSCONFIG_SET_FD         = 5,    /* Set parameter, supplying an object by fd */
-       FSCONFIG_CMD_CREATE     = 6,    /* Invoke superblock creation */
+       FSCONFIG_CMD_CREATE     = 6,    /* Create new or reuse existing superblock */
        FSCONFIG_CMD_RECONFIGURE = 7,   /* Invoke superblock reconfiguration */
+       FSCONFIG_CMD_CREATE_EXCL = 8,   /* Create new superblock, fail if reusing existing superblock */
 };
 
 /*
index 3767543..39c6a25 100644 (file)
@@ -1339,7 +1339,8 @@ union perf_mem_data_src {
 #define PERF_MEM_LVLNUM_L2     0x02 /* L2 */
 #define PERF_MEM_LVLNUM_L3     0x03 /* L3 */
 #define PERF_MEM_LVLNUM_L4     0x04 /* L4 */
-/* 5-0x8 available */
+/* 5-0x7 available */
+#define PERF_MEM_LVLNUM_UNC    0x08 /* Uncached */
 #define PERF_MEM_LVLNUM_CXL    0x09 /* CXL */
 #define PERF_MEM_LVLNUM_IO     0x0a /* I/O */
 #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
index 7865f5a..4f3932b 100644 (file)
@@ -710,9 +710,11 @@ enum {
        TCA_FLOWER_KEY_CFM_OPT_UNSPEC,
        TCA_FLOWER_KEY_CFM_MD_LEVEL,
        TCA_FLOWER_KEY_CFM_OPCODE,
-       TCA_FLOWER_KEY_CFM_OPT_MAX,
+       __TCA_FLOWER_KEY_CFM_OPT_MAX,
 };
 
+#define TCA_FLOWER_KEY_CFM_OPT_MAX (__TCA_FLOWER_KEY_CFM_OPT_MAX - 1)
+
 #define TCA_FLOWER_MASK_FLAGS_RANGE    (1 << 0) /* Range-based match */
 
 /* Match-all classifier */
index f17c963..5209010 100644 (file)
@@ -77,6 +77,7 @@
 #define        QFMT_VFS_V0 2
 #define QFMT_OCFS2 3
 #define        QFMT_VFS_V1 4
+#define        QFMT_SHMEM 5
 
 /* Size of block in which space limits are passed through the quota
  * interface */
index 0fdc6ef..dbfc9b3 100644 (file)
@@ -115,6 +115,8 @@ struct seccomp_notif_resp {
        __u32 flags;
 };
 
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+
 /* valid flags for seccomp_notif_addfd */
 #define SECCOMP_ADDFD_FLAG_SETFD       (1UL << 0) /* Specify remote fd */
 #define SECCOMP_ADDFD_FLAG_SEND                (1UL << 1) /* Addfd and return it, atomically */
@@ -150,4 +152,6 @@ struct seccomp_notif_addfd {
 #define SECCOMP_IOCTL_NOTIF_ADDFD      SECCOMP_IOW(3, \
                                                struct seccomp_notif_addfd)
 
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS  SECCOMP_IOW(4, __u64)
+
 #endif /* _UAPI_LINUX_SECCOMP_H */
index 7837ba4..7c3fc39 100644 (file)
@@ -45,3 +45,7 @@
                TYPE NAME[]; \
        }
 #endif
+
+#ifndef __counted_by
+#define __counted_by(m)
+#endif
index 2801b65..fd3f9e5 100644 (file)
@@ -71,6 +71,31 @@ struct utp_upiu_query {
 };
 
 /**
+ * struct utp_upiu_query_v4_0 - upiu request buffer structure for
+ * query request >= UFS 4.0 spec.
+ * @opcode: command to perform B-0
+ * @idn: a value that indicates the particular type of data B-1
+ * @index: Index to further identify data B-2
+ * @selector: Index to further identify data B-3
+ * @osf4: spec field B-5
+ * @osf5: spec field B 6,7
+ * @osf6: spec field DW 8,9
+ * @osf7: spec field DW 10,11
+ */
+struct utp_upiu_query_v4_0 {
+       __u8 opcode;
+       __u8 idn;
+       __u8 index;
+       __u8 selector;
+       __u8 osf3;
+       __u8 osf4;
+       __be16 osf5;
+       __be32 osf6;
+       __be32 osf7;
+       __be32 reserved;
+};
+
+/**
  * struct utp_upiu_cmd - Command UPIU structure
  * @data_transfer_len: Data Transfer Length DW-3
  * @cdb: Command Descriptor Block CDB DW-4 to DW-7
index 7fbf732..aef2b75 100644 (file)
@@ -101,4 +101,13 @@ struct ioctl_evtchn_restrict_domid {
        domid_t domid;
 };
 
+/*
+ * Bind statically allocated @port.
+ */
+#define IOCTL_EVTCHN_BIND_STATIC                       \
+       _IOC(_IOC_NONE, 'E', 7, sizeof(struct ioctl_evtchn_bind))
+struct ioctl_evtchn_bind {
+       unsigned int port;
+};
+
 #endif /* __LINUX_PUBLIC_EVTCHN_H__ */
index d202955..375718b 100644 (file)
@@ -98,6 +98,18 @@ struct privcmd_mmap_resource {
        __u64 addr;
 };
 
+/* For privcmd_irqfd::flags */
+#define PRIVCMD_IRQFD_FLAG_DEASSIGN (1 << 0)
+
+struct privcmd_irqfd {
+       void __user *dm_op;
+       __u32 size; /* Size of structure pointed by dm_op */
+       __u32 fd;
+       __u32 flags;
+       domid_t dom;
+       __u8 pad[2];
+};
+
 /*
  * @cmd: IOCTL_PRIVCMD_HYPERCALL
  * @arg: &privcmd_hypercall_t
@@ -125,5 +137,7 @@ struct privcmd_mmap_resource {
        _IOC(_IOC_NONE, 'P', 6, sizeof(domid_t))
 #define IOCTL_PRIVCMD_MMAP_RESOURCE                            \
        _IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource))
+#define IOCTL_PRIVCMD_IRQFD                                    \
+       _IOC(_IOC_NONE, 'P', 8, sizeof(struct privcmd_irqfd))
 
 #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
index 4e8d624..198cb39 100644 (file)
@@ -170,6 +170,7 @@ enum attr_idn {
        QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST    = 0x1E,
        QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE        = 0x1F,
        QUERY_ATTR_IDN_EXT_IID_EN               = 0x2A,
+       QUERY_ATTR_IDN_TIMESTAMP                = 0x30
 };
 
 /* Descriptor idn for Query requests */
index b958c2e..418eef6 100644 (file)
@@ -38,18 +38,6 @@ struct kyrofb_info {
        int wc_cookie;
 };
 
-extern int kyro_dev_init(void);
-extern void kyro_dev_reset(void);
-
-extern unsigned char *kyro_dev_physical_fb_ptr(void);
-extern unsigned char *kyro_dev_virtual_fb_ptr(void);
-extern void *kyro_dev_physical_regs_ptr(void);
-extern void *kyro_dev_virtual_regs_ptr(void);
-extern unsigned int kyro_dev_fb_size(void);
-extern unsigned int kyro_dev_regs_size(void);
-
-extern u32 kyro_dev_overlay_offset(void);
-
 /*
  * benedict.gaster@superh.com
  * Added the follow IOCTLS for the creation of overlay services...
index ac1281c..95d5e28 100644 (file)
@@ -69,13 +69,12 @@ int xen_set_irq_priority(unsigned irq, unsigned priority);
 /*
  * Allow extra references to event channels exposed to userspace by evtchn
  */
-int evtchn_make_refcounted(evtchn_port_t evtchn);
+int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static);
 int evtchn_get(evtchn_port_t evtchn);
 void evtchn_put(evtchn_port_t evtchn);
 
 void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector);
 void rebind_evtchn_irq(evtchn_port_t evtchn, int irq);
-int xen_set_affinity_evtchn(struct irq_desc *desc, unsigned int tcpu);
 
 static inline void notify_remote_via_evtchn(evtchn_port_t port)
 {
@@ -141,4 +140,13 @@ void xen_init_IRQ(void);
 
 irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
 
+static inline void xen_evtchn_close(evtchn_port_t port)
+{
+       struct evtchn_close close;
+
+       close.port = port;
+       if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+               BUG();
+}
+
 #endif /* _XEN_EVENTS_H */
index f7f65af..5e7d488 100644 (file)
@@ -629,6 +629,7 @@ config TASK_IO_ACCOUNTING
 
 config PSI
        bool "Pressure stall information tracking"
+       select KERNFS
        help
          Collect metrics that indicate how overcommitted the CPU, memory,
          and IO capacity are in the system.
index 1aa0158..5dfd30b 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/ramfs.h>
 #include <linux/shmem_fs.h>
+#include <linux/ktime.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs_sb.h>
@@ -71,12 +72,37 @@ static int __init rootwait_setup(char *str)
 {
        if (*str)
                return 0;
-       root_wait = 1;
+       root_wait = -1;
        return 1;
 }
 
 __setup("rootwait", rootwait_setup);
 
+static int __init rootwait_timeout_setup(char *str)
+{
+       int sec;
+
+       if (kstrtoint(str, 0, &sec) || sec < 0) {
+               pr_warn("ignoring invalid rootwait value\n");
+               goto ignore;
+       }
+
+       if (check_mul_overflow(sec, MSEC_PER_SEC, &root_wait)) {
+               pr_warn("ignoring excessive rootwait value\n");
+               goto ignore;
+       }
+
+       return 1;
+
+ignore:
+       /* Fallback to indefinite wait */
+       root_wait = -1;
+
+       return 1;
+}
+
+__setup("rootwait=", rootwait_timeout_setup);
+
 static char * __initdata root_mount_data;
 static int __init root_data_setup(char *str)
 {
@@ -384,14 +410,22 @@ void __init mount_root(char *root_device_name)
 /* wait for any asynchronous scanning to complete */
 static void __init wait_for_root(char *root_device_name)
 {
+       ktime_t end;
+
        if (ROOT_DEV != 0)
                return;
 
        pr_info("Waiting for root device %s...\n", root_device_name);
 
+       end = ktime_add_ms(ktime_get_raw(), root_wait);
+
        while (!driver_probe_done() ||
-              early_lookup_bdev(root_device_name, &ROOT_DEV) < 0)
+              early_lookup_bdev(root_device_name, &ROOT_DEV) < 0) {
                msleep(5);
+               if (root_wait > 0 && ktime_after(ktime_get_raw(), end))
+                       break;
+       }
+
        async_synchronize_full();
 
 }
index e8096d5..93db3e4 100644 (file)
@@ -1948,6 +1948,14 @@ fail:
                ret = io_issue_sqe(req, issue_flags);
                if (ret != -EAGAIN)
                        break;
+
+               /*
+                * If REQ_F_NOWAIT is set, then don't wait or retry with
+                * poll. -EAGAIN is final for that case.
+                */
+               if (req->flags & REQ_F_NOWAIT)
+                       break;
+
                /*
                 * We can get EAGAIN for iopolled IO even though we're
                 * forcing a sync submission from here, since we can't
@@ -2485,10 +2493,21 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
        return 0;
 }
 
+static bool current_pending_io(void)
+{
+       struct io_uring_task *tctx = current->io_uring;
+
+       if (!tctx)
+               return false;
+       return percpu_counter_read_positive(&tctx->inflight);
+}
+
 /* when returns >0, the caller should retry */
 static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
                                          struct io_wait_queue *iowq)
 {
+       int io_wait, ret;
+
        if (unlikely(READ_ONCE(ctx->check_cq)))
                return 1;
        if (unlikely(!llist_empty(&ctx->work_llist)))
@@ -2499,11 +2518,22 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
                return -EINTR;
        if (unlikely(io_should_wake(iowq)))
                return 0;
+
+       /*
+        * Mark us as being in io_wait if we have pending requests, so cpufreq
+        * can take into account that the task is waiting for IO - turns out
+        * to be important for low QD IO.
+        */
+       io_wait = current->in_iowait;
+       if (current_pending_io())
+               current->in_iowait = 1;
+       ret = 0;
        if (iowq->timeout == KTIME_MAX)
                schedule();
        else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
-               return -ETIME;
-       return 0;
+               ret = -ETIME;
+       current->in_iowait = io_wait;
+       return ret;
 }
 
 /*
@@ -3418,8 +3448,6 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
                        unsigned long addr, unsigned long len,
                        unsigned long pgoff, unsigned long flags)
 {
-       const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
-       struct vm_unmapped_area_info info;
        void *ptr;
 
        /*
@@ -3434,32 +3462,29 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
        if (IS_ERR(ptr))
                return -ENOMEM;
 
-       info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-       info.length = len;
-       info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-       info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+       /*
+        * Some architectures have strong cache aliasing requirements.
+        * For such architectures we need a coherent mapping which aliases
+        * kernel memory *and* userspace memory. To achieve that:
+        * - use a NULL file pointer to reference physical memory, and
+        * - use the kernel virtual address of the shared io_uring context
+        *   (instead of the userspace-provided address, which has to be 0UL
+        *   anyway).
+        * - use the same pgoff which the get_unmapped_area() uses to
+        *   calculate the page colouring.
+        * For architectures without such aliasing requirements, the
+        * architecture will return any suitable mapping because addr is 0.
+        */
+       filp = NULL;
+       flags |= MAP_SHARED;
+       pgoff = 0;      /* has been translated to ptr above */
 #ifdef SHM_COLOUR
-       info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
+       addr = (uintptr_t) ptr;
+       pgoff = addr >> PAGE_SHIFT;
 #else
-       info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
+       addr = 0UL;
 #endif
-       info.align_offset = (unsigned long) ptr;
-
-       /*
-        * A failed mmap() very likely causes application failure,
-        * so fall back to the bottom-up function here. This scenario
-        * can happen with large stack limits and large mmap()
-        * allocations.
-        */
-       addr = vm_unmapped_area(&info);
-       if (offset_in_page(addr)) {
-               info.flags = 0;
-               info.low_limit = TASK_UNMAPPED_BASE;
-               info.high_limit = mmap_end;
-               addr = vm_unmapped_area(&info);
-       }
-
-       return addr;
+       return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
 }
 
 #else /* !CONFIG_MMU */
@@ -3859,7 +3884,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
                ctx->syscall_iopoll = 1;
 
        ctx->compat = in_compat_syscall();
-       if (!capable(CAP_IPC_LOCK))
+       if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
                ctx->user = get_uid(current_user());
 
        /*
index 10ca57f..e3fae26 100644 (file)
@@ -35,9 +35,11 @@ static bool io_openat_force_async(struct io_open *open)
 {
        /*
         * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
-        * it'll always -EAGAIN
+        * it'll always -EAGAIN. Note that we test for __O_TMPFILE because
+        * O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force
+        * async for.
         */
-       return open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE);
+       return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE);
 }
 
 static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
index 1bce220..b343503 100644 (file)
@@ -105,6 +105,7 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        } else {
                rw->kiocb.ki_ioprio = get_current_ioprio();
        }
+       rw->kiocb.dio_complete = NULL;
 
        rw->addr = READ_ONCE(sqe->addr);
        rw->len = READ_ONCE(sqe->len);
@@ -220,17 +221,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
 }
 #endif
 
-static void kiocb_end_write(struct io_kiocb *req)
+static void io_req_end_write(struct io_kiocb *req)
 {
-       /*
-        * Tell lockdep we inherited freeze protection from submission
-        * thread.
-        */
        if (req->flags & REQ_F_ISREG) {
-               struct super_block *sb = file_inode(req->file)->i_sb;
+               struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
-               __sb_writers_acquired(sb, SB_FREEZE_WRITE);
-               sb_end_write(sb);
+               kiocb_end_write(&rw->kiocb);
        }
 }
 
@@ -243,7 +239,7 @@ static void io_req_io_end(struct io_kiocb *req)
        struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
 
        if (rw->kiocb.ki_flags & IOCB_WRITE) {
-               kiocb_end_write(req);
+               io_req_end_write(req);
                fsnotify_modify(req->file);
        } else {
                fsnotify_access(req->file);
@@ -285,6 +281,15 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
 
 void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
 {
+       struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+       struct kiocb *kiocb = &rw->kiocb;
+
+       if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
+               long res = kiocb->dio_complete(rw->kiocb.private);
+
+               io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+       }
+
        io_req_io_end(req);
 
        if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
@@ -300,9 +305,11 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
        struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
        struct io_kiocb *req = cmd_to_io_kiocb(rw);
 
-       if (__io_complete_rw_common(req, res))
-               return;
-       io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+       if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
+               if (__io_complete_rw_common(req, res))
+                       return;
+               io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+       }
        req->io_task_work.func = io_req_rw_complete;
        __io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
 }
@@ -313,7 +320,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
        struct io_kiocb *req = cmd_to_io_kiocb(rw);
 
        if (kiocb->ki_flags & IOCB_WRITE)
-               kiocb_end_write(req);
+               io_req_end_write(req);
        if (unlikely(res != req->cqe.res)) {
                if (res == -EAGAIN && io_rw_should_reissue(req)) {
                        req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
@@ -902,19 +909,18 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
                return ret;
        }
 
+       if (req->flags & REQ_F_ISREG)
+               kiocb_start_write(kiocb);
+       kiocb->ki_flags |= IOCB_WRITE;
+
        /*
-        * Open-code file_start_write here to grab freeze protection,
-        * which will be released by another thread in
-        * io_complete_rw().  Fool lockdep by telling it the lock got
-        * released so that it doesn't complain about the held lock when
-        * we return to userspace.
+        * For non-polled IO, set IOCB_DIO_CALLER_COMP, stating that our handler
+        * groks deferring the completion to task context. This isn't
+        * necessary and useful for polled IO as that can always complete
+        * directly.
         */
-       if (req->flags & REQ_F_ISREG) {
-               sb_start_write(file_inode(req->file)->i_sb);
-               __sb_writers_release(file_inode(req->file)->i_sb,
-                                       SB_FREEZE_WRITE);
-       }
-       kiocb->ki_flags |= IOCB_WRITE;
+       if (!(kiocb->ki_flags & IOCB_HIPRI))
+               kiocb->ki_flags |= IOCB_DIO_CALLER_COMP;
 
        if (likely(req->file->f_op->write_iter))
                ret2 = call_write_iter(req->file, kiocb, &s->iter);
@@ -961,7 +967,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
                                io->bytes_done += ret2;
 
                        if (kiocb->ki_flags & IOCB_WRITE)
-                               kiocb_end_write(req);
+                               io_req_end_write(req);
                        return ret ? ret : -EAGAIN;
                }
 done:
@@ -972,7 +978,7 @@ copy_iov:
                ret = io_setup_async_rw(req, iovec, s, false);
                if (!ret) {
                        if (kiocb->ki_flags & IOCB_WRITE)
-                               kiocb_end_write(req);
+                               io_req_end_write(req);
                        return -EAGAIN;
                }
                return ret;
index 71881bd..ba8215e 100644 (file)
@@ -302,7 +302,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
        inode->i_mode = mode;
        inode->i_uid = current_fsuid();
        inode->i_gid = current_fsgid();
-       inode->i_mtime = inode->i_ctime = inode->i_atime = current_time(inode);
+       inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
 
        if (S_ISREG(mode)) {
                struct mqueue_inode_info *info;
@@ -596,7 +596,7 @@ static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
 
        put_ipc_ns(ipc_ns);
        dir->i_size += DIRENT_SIZE;
-       dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+       dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
 
        d_instantiate(dentry, inode);
        dget(dentry);
@@ -618,7 +618,7 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 {
        struct inode *inode = d_inode(dentry);
 
-       dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+       dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
        dir->i_size -= DIRENT_SIZE;
        drop_nlink(inode);
        dput(dentry);
@@ -635,7 +635,8 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
 static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
                                size_t count, loff_t *off)
 {
-       struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
+       struct inode *inode = file_inode(filp);
+       struct mqueue_inode_info *info = MQUEUE_I(inode);
        char buffer[FILENT_SIZE];
        ssize_t ret;
 
@@ -656,7 +657,7 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
        if (ret <= 0)
                return ret;
 
-       file_inode(filp)->i_atime = file_inode(filp)->i_ctime = current_time(file_inode(filp));
+       inode->i_atime = inode_set_ctime_current(inode);
        return ret;
 }
 
@@ -1162,8 +1163,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
                                goto out_unlock;
                        __do_notify(info);
                }
-               inode->i_atime = inode->i_mtime = inode->i_ctime =
-                               current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        }
 out_unlock:
        spin_unlock(&info->lock);
@@ -1257,8 +1257,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
 
                msg_ptr = msg_get(info);
 
-               inode->i_atime = inode->i_mtime = inode->i_ctime =
-                               current_time(inode);
+               inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
 
                /* There is now free space in queue. */
                pipelined_receive(&wake_q, info);
@@ -1396,7 +1395,7 @@ retry:
        if (notification == NULL) {
                if (info->notify_owner == task_tgid(current)) {
                        remove_notification(info);
-                       inode->i_atime = inode->i_ctime = current_time(inode);
+                       inode->i_atime = inode_set_ctime_current(inode);
                }
        } else if (info->notify_owner != NULL) {
                ret = -EBUSY;
@@ -1422,7 +1421,7 @@ retry:
 
                info->notify_owner = get_pid(task_tgid(current));
                info->notify_user_ns = get_user_ns(current_user_ns());
-               inode->i_atime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode_set_ctime_current(inode);
        }
        spin_unlock(&info->lock);
 out_fput:
@@ -1485,7 +1484,7 @@ static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old)
                        f.file->f_flags &= ~O_NONBLOCK;
                spin_unlock(&f.file->f_lock);
 
-               inode->i_atime = inode->i_ctime = current_time(inode);
+               inode->i_atime = inode_set_ctime_current(inode);
        }
 
        spin_unlock(&info->lock);
index 8a33e87..286ab3d 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/sched.h>
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
+#include <linux/completion.h>
 #include <trace/events/xdp.h>
 #include <linux/btf_ids.h>
 
@@ -73,6 +74,7 @@ struct bpf_cpu_map_entry {
        struct rcu_head rcu;
 
        struct work_struct kthread_stop_wq;
+       struct completion kthread_running;
 };
 
 struct bpf_cpu_map {
@@ -122,22 +124,6 @@ static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
        atomic_inc(&rcpu->refcnt);
 }
 
-/* called from workqueue, to workaround syscall using preempt_disable */
-static void cpu_map_kthread_stop(struct work_struct *work)
-{
-       struct bpf_cpu_map_entry *rcpu;
-
-       rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
-
-       /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier,
-        * as it waits until all in-flight call_rcu() callbacks complete.
-        */
-       rcu_barrier();
-
-       /* kthread_stop will wake_up_process and wait for it to complete */
-       kthread_stop(rcpu->kthread);
-}
-
 static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
 {
        /* The tear-down procedure should have made sure that queue is
@@ -145,11 +131,17 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
         * invoked cpu_map_kthread_stop(). Catch any broken behaviour
         * gracefully and warn once.
         */
-       struct xdp_frame *xdpf;
+       void *ptr;
 
-       while ((xdpf = ptr_ring_consume(ring)))
-               if (WARN_ON_ONCE(xdpf))
-                       xdp_return_frame(xdpf);
+       while ((ptr = ptr_ring_consume(ring))) {
+               WARN_ON_ONCE(1);
+               if (unlikely(__ptr_test_bit(0, &ptr))) {
+                       __ptr_clear_bit(0, &ptr);
+                       kfree_skb(ptr);
+                       continue;
+               }
+               xdp_return_frame(ptr);
+       }
 }
 
 static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
@@ -165,6 +157,22 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
        }
 }
 
+/* called from workqueue, to workaround syscall using preempt_disable */
+static void cpu_map_kthread_stop(struct work_struct *work)
+{
+       struct bpf_cpu_map_entry *rcpu;
+
+       rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
+
+       /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier,
+        * as it waits until all in-flight call_rcu() callbacks complete.
+        */
+       rcu_barrier();
+
+       /* kthread_stop will wake_up_process and wait for it to complete */
+       kthread_stop(rcpu->kthread);
+}
+
 static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
                                     struct list_head *listp,
                                     struct xdp_cpumap_stats *stats)
@@ -290,11 +298,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
        return nframes;
 }
 
-
 static int cpu_map_kthread_run(void *data)
 {
        struct bpf_cpu_map_entry *rcpu = data;
 
+       complete(&rcpu->kthread_running);
        set_current_state(TASK_INTERRUPTIBLE);
 
        /* When kthread gives stop order, then rcpu have been disconnected
@@ -459,6 +467,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
                goto free_ptr_ring;
 
        /* Setup kthread */
+       init_completion(&rcpu->kthread_running);
        rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
                                               "cpumap/%d/map:%d", cpu,
                                               map->id);
@@ -472,6 +481,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
        kthread_bind(rcpu->kthread, cpu);
        wake_up_process(rcpu->kthread);
 
+       /* Make sure kthread has been running, so kthread_stop() will not
+        * stop the kthread prematurely and all pending frames or skbs
+        * will be handled by the kthread before kthread_stop() returns.
+        */
+       wait_for_completion(&rcpu->kthread_running);
+
        return rcpu;
 
 free_prog:
index 4174f76..99d0625 100644 (file)
@@ -118,9 +118,8 @@ static struct inode *bpf_get_inode(struct super_block *sb,
                return ERR_PTR(-ENOSPC);
 
        inode->i_ino = get_next_ino();
-       inode->i_atime = current_time(inode);
+       inode->i_atime = inode_set_ctime_current(inode);
        inode->i_mtime = inode->i_atime;
-       inode->i_ctime = inode->i_atime;
 
        inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
 
@@ -148,8 +147,7 @@ static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
        d_instantiate(dentry, inode);
        dget(dentry);
 
-       dir->i_mtime = current_time(dir);
-       dir->i_ctime = dir->i_mtime;
+       dir->i_mtime = inode_set_ctime_current(dir);
 }
 
 static int bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
index 11e54dd..02a021c 100644 (file)
@@ -5573,16 +5573,17 @@ static int update_stack_depth(struct bpf_verifier_env *env,
  * Since recursion is prevented by check_cfg() this algorithm
  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
  */
-static int check_max_stack_depth(struct bpf_verifier_env *env)
+static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
 {
-       int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
        struct bpf_subprog_info *subprog = env->subprog_info;
        struct bpf_insn *insn = env->prog->insnsi;
+       int depth = 0, frame = 0, i, subprog_end;
        bool tail_call_reachable = false;
        int ret_insn[MAX_CALL_FRAMES];
        int ret_prog[MAX_CALL_FRAMES];
        int j;
 
+       i = subprog[idx].start;
 process_func:
        /* protect against potential stack overflow that might happen when
         * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
@@ -5621,7 +5622,7 @@ process_func:
 continue_func:
        subprog_end = subprog[idx + 1].start;
        for (; i < subprog_end; i++) {
-               int next_insn;
+               int next_insn, sidx;
 
                if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
                        continue;
@@ -5631,21 +5632,23 @@ continue_func:
 
                /* find the callee */
                next_insn = i + insn[i].imm + 1;
-               idx = find_subprog(env, next_insn);
-               if (idx < 0) {
+               sidx = find_subprog(env, next_insn);
+               if (sidx < 0) {
                        WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
                                  next_insn);
                        return -EFAULT;
                }
-               if (subprog[idx].is_async_cb) {
-                       if (subprog[idx].has_tail_call) {
+               if (subprog[sidx].is_async_cb) {
+                       if (subprog[sidx].has_tail_call) {
                                verbose(env, "verifier bug. subprog has tail_call and async cb\n");
                                return -EFAULT;
                        }
-                        /* async callbacks don't increase bpf prog stack size */
-                       continue;
+                       /* async callbacks don't increase bpf prog stack size unless called directly */
+                       if (!bpf_pseudo_call(insn + i))
+                               continue;
                }
                i = next_insn;
+               idx = sidx;
 
                if (subprog[idx].has_tail_call)
                        tail_call_reachable = true;
@@ -5681,6 +5684,22 @@ continue_func:
        goto continue_func;
 }
 
+static int check_max_stack_depth(struct bpf_verifier_env *env)
+{
+       struct bpf_subprog_info *si = env->subprog_info;
+       int ret;
+
+       for (int i = 0; i < env->subprog_cnt; i++) {
+               if (!i || si[i].is_async_cb) {
+                       ret = check_max_stack_depth_subprog(env, i);
+                       if (ret < 0)
+                               return ret;
+               }
+               continue;
+       }
+       return 0;
+}
+
 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 static int get_callee_stack_depth(struct bpf_verifier_env *env,
                                  const struct bpf_insn *insn, int idx)
index bfe3cd8..5fa95f8 100644 (file)
@@ -3685,6 +3685,36 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
        return ret;
 }
 
+static int __maybe_unused cgroup_local_stat_show(struct seq_file *seq,
+                                                struct cgroup *cgrp, int ssid)
+{
+       struct cgroup_subsys *ss = cgroup_subsys[ssid];
+       struct cgroup_subsys_state *css;
+       int ret;
+
+       if (!ss->css_local_stat_show)
+               return 0;
+
+       css = cgroup_tryget_css(cgrp, ss);
+       if (!css)
+               return 0;
+
+       ret = ss->css_local_stat_show(seq, css);
+       css_put(css);
+       return ret;
+}
+
+static int cpu_local_stat_show(struct seq_file *seq, void *v)
+{
+       struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
+       int ret = 0;
+
+#ifdef CONFIG_CGROUP_SCHED
+       ret = cgroup_local_stat_show(seq, cgrp, cpu_cgrp_id);
+#endif
+       return ret;
+}
+
 #ifdef CONFIG_PSI
 static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
 {
@@ -3730,7 +3760,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
        }
 
        psi = cgroup_psi(cgrp);
-       new = psi_trigger_create(psi, buf, res, of->file);
+       new = psi_trigger_create(psi, buf, res, of->file, of);
        if (IS_ERR(new)) {
                cgroup_put(cgrp);
                return PTR_ERR(new);
@@ -5235,6 +5265,10 @@ static struct cftype cgroup_base_files[] = {
                .name = "cpu.stat",
                .seq_show = cpu_stat_show,
        },
+       {
+               .name = "cpu.stat.local",
+               .seq_show = cpu_local_stat_show,
+       },
        { }     /* terminate */
 };
 
index 88a7ede..f6811c8 100644 (file)
@@ -592,7 +592,10 @@ static void lockdep_release_cpus_lock(void)
 void __weak arch_smt_update(void) { }
 
 #ifdef CONFIG_HOTPLUG_SMT
+
 enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED;
+static unsigned int cpu_smt_max_threads __ro_after_init;
+unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX;
 
 void __init cpu_smt_disable(bool force)
 {
@@ -606,16 +609,33 @@ void __init cpu_smt_disable(bool force)
                pr_info("SMT: disabled\n");
                cpu_smt_control = CPU_SMT_DISABLED;
        }
+       cpu_smt_num_threads = 1;
 }
 
 /*
  * The decision whether SMT is supported can only be done after the full
  * CPU identification. Called from architecture code.
  */
-void __init cpu_smt_check_topology(void)
+void __init cpu_smt_set_num_threads(unsigned int num_threads,
+                                   unsigned int max_threads)
 {
-       if (!topology_smt_supported())
+       WARN_ON(!num_threads || (num_threads > max_threads));
+
+       if (max_threads == 1)
                cpu_smt_control = CPU_SMT_NOT_SUPPORTED;
+
+       cpu_smt_max_threads = max_threads;
+
+       /*
+        * If SMT has been disabled via the kernel command line or SMT is
+        * not supported, set cpu_smt_num_threads to 1 for consistency.
+        * If enabled, take the architecture requested number of threads
+        * to bring up into account.
+        */
+       if (cpu_smt_control != CPU_SMT_ENABLED)
+               cpu_smt_num_threads = 1;
+       else if (num_threads < cpu_smt_num_threads)
+               cpu_smt_num_threads = num_threads;
 }
 
 static int __init smt_cmdline_disable(char *str)
@@ -625,9 +645,23 @@ static int __init smt_cmdline_disable(char *str)
 }
 early_param("nosmt", smt_cmdline_disable);
 
+/*
+ * For Archicture supporting partial SMT states check if the thread is allowed.
+ * Otherwise this has already been checked through cpu_smt_max_threads when
+ * setting the SMT level.
+ */
+static inline bool cpu_smt_thread_allowed(unsigned int cpu)
+{
+#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC
+       return topology_smt_thread_allowed(cpu);
+#else
+       return true;
+#endif
+}
+
 static inline bool cpu_smt_allowed(unsigned int cpu)
 {
-       if (cpu_smt_control == CPU_SMT_ENABLED)
+       if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
                return true;
 
        if (topology_is_primary_thread(cpu))
@@ -642,7 +676,7 @@ static inline bool cpu_smt_allowed(unsigned int cpu)
        return !cpumask_test_cpu(cpu, &cpus_booted_once_mask);
 }
 
-/* Returns true if SMT is not supported of forcefully (irreversibly) disabled */
+/* Returns true if SMT is supported and not forcefully (irreversibly) disabled */
 bool cpu_smt_possible(void)
 {
        return cpu_smt_control != CPU_SMT_FORCE_DISABLED &&
@@ -650,22 +684,8 @@ bool cpu_smt_possible(void)
 }
 EXPORT_SYMBOL_GPL(cpu_smt_possible);
 
-static inline bool cpuhp_smt_aware(void)
-{
-       return topology_smt_supported();
-}
-
-static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
-{
-       return cpu_primary_thread_mask;
-}
 #else
 static inline bool cpu_smt_allowed(unsigned int cpu) { return true; }
-static inline bool cpuhp_smt_aware(void) { return false; }
-static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
-{
-       return cpu_present_mask;
-}
 #endif
 
 static inline enum cpuhp_state
@@ -1793,6 +1813,16 @@ static int __init parallel_bringup_parse_param(char *arg)
 }
 early_param("cpuhp.parallel", parallel_bringup_parse_param);
 
+static inline bool cpuhp_smt_aware(void)
+{
+       return cpu_smt_max_threads > 1;
+}
+
+static inline const struct cpumask *cpuhp_get_primary_thread_mask(void)
+{
+       return cpu_primary_thread_mask;
+}
+
 /*
  * On architectures which have enabled parallel bringup this invokes all BP
  * prepare states for each of the to be onlined APs first. The last state
@@ -2626,6 +2656,12 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval)
        for_each_online_cpu(cpu) {
                if (topology_is_primary_thread(cpu))
                        continue;
+               /*
+                * Disable can be called with CPU_SMT_ENABLED when changing
+                * from a higher to lower number of SMT threads per core.
+                */
+               if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu))
+                       continue;
                ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE);
                if (ret)
                        break;
@@ -2660,6 +2696,8 @@ int cpuhp_smt_enable(void)
                /* Skip online CPUs and CPUs on offline nodes */
                if (cpu_online(cpu) || !node_online(cpu_to_node(cpu)))
                        continue;
+               if (!cpu_smt_thread_allowed(cpu))
+                       continue;
                ret = _cpu_up(cpu, 0, CPUHP_ONLINE);
                if (ret)
                        break;
@@ -2838,20 +2876,19 @@ static const struct attribute_group cpuhp_cpu_root_attr_group = {
 
 #ifdef CONFIG_HOTPLUG_SMT
 
+static bool cpu_smt_num_threads_valid(unsigned int threads)
+{
+       if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC))
+               return threads >= 1 && threads <= cpu_smt_max_threads;
+       return threads == 1 || threads == cpu_smt_max_threads;
+}
+
 static ssize_t
 __store_smt_control(struct device *dev, struct device_attribute *attr,
                    const char *buf, size_t count)
 {
-       int ctrlval, ret;
-
-       if (sysfs_streq(buf, "on"))
-               ctrlval = CPU_SMT_ENABLED;
-       else if (sysfs_streq(buf, "off"))
-               ctrlval = CPU_SMT_DISABLED;
-       else if (sysfs_streq(buf, "forceoff"))
-               ctrlval = CPU_SMT_FORCE_DISABLED;
-       else
-               return -EINVAL;
+       int ctrlval, ret, num_threads, orig_threads;
+       bool force_off;
 
        if (cpu_smt_control == CPU_SMT_FORCE_DISABLED)
                return -EPERM;
@@ -2859,21 +2896,39 @@ __store_smt_control(struct device *dev, struct device_attribute *attr,
        if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
                return -ENODEV;
 
+       if (sysfs_streq(buf, "on")) {
+               ctrlval = CPU_SMT_ENABLED;
+               num_threads = cpu_smt_max_threads;
+       } else if (sysfs_streq(buf, "off")) {
+               ctrlval = CPU_SMT_DISABLED;
+               num_threads = 1;
+       } else if (sysfs_streq(buf, "forceoff")) {
+               ctrlval = CPU_SMT_FORCE_DISABLED;
+               num_threads = 1;
+       } else if (kstrtoint(buf, 10, &num_threads) == 0) {
+               if (num_threads == 1)
+                       ctrlval = CPU_SMT_DISABLED;
+               else if (cpu_smt_num_threads_valid(num_threads))
+                       ctrlval = CPU_SMT_ENABLED;
+               else
+                       return -EINVAL;
+       } else {
+               return -EINVAL;
+       }
+
        ret = lock_device_hotplug_sysfs();
        if (ret)
                return ret;
 
-       if (ctrlval != cpu_smt_control) {
-               switch (ctrlval) {
-               case CPU_SMT_ENABLED:
-                       ret = cpuhp_smt_enable();
-                       break;
-               case CPU_SMT_DISABLED:
-               case CPU_SMT_FORCE_DISABLED:
-                       ret = cpuhp_smt_disable(ctrlval);
-                       break;
-               }
-       }
+       orig_threads = cpu_smt_num_threads;
+       cpu_smt_num_threads = num_threads;
+
+       force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED;
+
+       if (num_threads > orig_threads)
+               ret = cpuhp_smt_enable();
+       else if (num_threads < orig_threads || force_off)
+               ret = cpuhp_smt_disable(ctrlval);
 
        unlock_device_hotplug();
        return ret ? ret : count;
@@ -2901,6 +2956,17 @@ static ssize_t control_show(struct device *dev,
 {
        const char *state = smt_states[cpu_smt_control];
 
+#ifdef CONFIG_HOTPLUG_SMT
+       /*
+        * If SMT is enabled but not all threads are enabled then show the
+        * number of threads. If all threads are enabled show "on". Otherwise
+        * show the state name.
+        */
+       if (cpu_smt_control == CPU_SMT_ENABLED &&
+           cpu_smt_num_threads != cpu_smt_max_threads)
+               return sysfs_emit(buf, "%d\n", cpu_smt_num_threads);
+#endif
+
        return snprintf(buf, PAGE_SIZE - 2, "%s\n", state);
 }
 
index be61332..d7ee4bc 100644 (file)
@@ -205,8 +205,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
 
        arch_exit_to_user_mode_prepare(regs, ti_work);
 
-       /* Ensure that the address limit is intact and no locks are held */
-       addr_limit_user_check();
+       /* Ensure that kernel state is sane for a return to userspace */
        kmap_assert_nomap();
        lockdep_assert_irqs_disabled();
        lockdep_sys_exit();
index 78ae7b6..93015cb 100644 (file)
@@ -8249,7 +8249,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
        unsigned int size;
 
        memset(comm, 0, sizeof(comm));
-       strlcpy(comm, comm_event->task->comm, sizeof(comm));
+       strscpy(comm, comm_event->task->comm, sizeof(comm));
        size = ALIGN(strlen(comm)+1, sizeof(u64));
 
        comm_event->comm = comm;
@@ -8704,7 +8704,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
        }
 
 cpy_name:
-       strlcpy(tmp, name, sizeof(tmp));
+       strscpy(tmp, name, sizeof(tmp));
        name = tmp;
 got_name:
        /*
@@ -9128,7 +9128,7 @@ void perf_event_ksymbol(u16 ksym_type, u64 addr, u32 len, bool unregister,
            ksym_type == PERF_RECORD_KSYMBOL_TYPE_UNKNOWN)
                goto err;
 
-       strlcpy(name, sym, KSYM_NAME_LEN);
+       strscpy(name, sym, KSYM_NAME_LEN);
        name_len = strlen(name) + 1;
        while (!IS_ALIGNED(name_len, sizeof(u64)))
                name[name_len++] = '\0';
@@ -9595,16 +9595,16 @@ u64 perf_swevent_set_period(struct perf_event *event)
 
        hwc->last_period = hwc->sample_period;
 
-again:
-       old = val = local64_read(&hwc->period_left);
-       if (val < 0)
-               return 0;
+       old = local64_read(&hwc->period_left);
+       do {
+               val = old;
+               if (val < 0)
+                       return 0;
 
-       nr = div64_u64(period + val, period);
-       offset = nr * period;
-       val -= offset;
-       if (local64_cmpxchg(&hwc->period_left, old, val) != old)
-               goto again;
+               nr = div64_u64(period + val, period);
+               offset = nr * period;
+               val -= offset;
+       } while (!local64_try_cmpxchg(&hwc->period_left, &old, val));
 
        return nr;
 }
index a0433f3..fb1e180 100644 (file)
@@ -191,9 +191,10 @@ __perf_output_begin(struct perf_output_handle *handle,
 
        perf_output_get_handle(handle);
 
+       offset = local_read(&rb->head);
        do {
+               head = offset;
                tail = READ_ONCE(rb->user_page->data_tail);
-               offset = head = local_read(&rb->head);
                if (!rb->overwrite) {
                        if (unlikely(!ring_buffer_has_space(head, tail,
                                                            perf_data_size(rb),
@@ -217,7 +218,7 @@ __perf_output_begin(struct perf_output_handle *handle,
                        head += size;
                else
                        head -= size;
-       } while (local_cmpxchg(&rb->head, offset, head) != offset);
+       } while (!local_try_cmpxchg(&rb->head, &offset, head));
 
        if (backward) {
                offset = head;
index d2e12b6..f811497 100644 (file)
@@ -985,6 +985,14 @@ void __put_task_struct(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(__put_task_struct);
 
+void __put_task_struct_rcu_cb(struct rcu_head *rhp)
+{
+       struct task_struct *task = container_of(rhp, struct task_struct, rcu);
+
+       __put_task_struct(task);
+}
+EXPORT_SYMBOL_GPL(__put_task_struct_rcu_cb);
+
 void __init __weak arch_task_cache_init(void) { }
 
 /*
index ee8c0ac..dc94e0b 100644 (file)
@@ -473,11 +473,12 @@ void handle_nested_irq(unsigned int irq)
        action = desc->action;
        if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
                desc->istate |= IRQS_PENDING;
-               goto out_unlock;
+               raw_spin_unlock_irq(&desc->lock);
+               return;
        }
 
        kstat_incr_irqs_this_cpu(desc);
-       irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
+       atomic_inc(&desc->threads_active);
        raw_spin_unlock_irq(&desc->lock);
 
        action_ret = IRQ_NONE;
@@ -487,11 +488,7 @@ void handle_nested_irq(unsigned int irq)
        if (!irq_settings_no_debug(desc))
                note_interrupt(desc, action_ret);
 
-       raw_spin_lock_irq(&desc->lock);
-       irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
-
-out_unlock:
-       raw_spin_unlock_irq(&desc->lock);
+       wake_threads_waitq(desc);
 }
 EXPORT_SYMBOL_GPL(handle_nested_irq);
 
index bdd35bb..bcc7f21 100644 (file)
@@ -108,8 +108,6 @@ extern int __irq_get_irqchip_state(struct irq_data *data,
                                   enum irqchip_irq_state which,
                                   bool *state);
 
-extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
-
 irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc);
 irqreturn_t handle_irq_event_percpu(struct irq_desc *desc);
 irqreturn_t handle_irq_event(struct irq_desc *desc);
@@ -121,6 +119,8 @@ void irq_resend_init(struct irq_desc *desc);
 bool irq_wait_for_poll(struct irq_desc *desc);
 void __irq_wake_thread(struct irq_desc *desc, struct irqaction *action);
 
+void wake_threads_waitq(struct irq_desc *desc);
+
 #ifdef CONFIG_PROC_FS
 extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
 extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
index d2742af..d309ba8 100644 (file)
@@ -108,6 +108,16 @@ bool synchronize_hardirq(unsigned int irq)
 }
 EXPORT_SYMBOL(synchronize_hardirq);
 
+static void __synchronize_irq(struct irq_desc *desc)
+{
+       __synchronize_hardirq(desc, true);
+       /*
+        * We made sure that no hardirq handler is running. Now verify that no
+        * threaded handlers are active.
+        */
+       wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
+}
+
 /**
  *     synchronize_irq - wait for pending IRQ handlers (on other CPUs)
  *     @irq: interrupt number to wait for
@@ -127,16 +137,8 @@ void synchronize_irq(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
 
-       if (desc) {
-               __synchronize_hardirq(desc, true);
-               /*
-                * We made sure that no hardirq handler is
-                * running. Now verify that no threaded handlers are
-                * active.
-                */
-               wait_event(desc->wait_for_threads,
-                          !atomic_read(&desc->threads_active));
-       }
+       if (desc)
+               __synchronize_irq(desc);
 }
 EXPORT_SYMBOL(synchronize_irq);
 
@@ -1216,7 +1218,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,
        return ret;
 }
 
-static void wake_threads_waitq(struct irq_desc *desc)
+void wake_threads_waitq(struct irq_desc *desc)
 {
        if (atomic_dec_and_test(&desc->threads_active))
                wake_up(&desc->wait_for_threads);
@@ -1944,7 +1946,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
         * supports it also make sure that there is no (not yet serviced)
         * interrupt in flight at the hardware level.
         */
-       __synchronize_hardirq(desc, true);
+       __synchronize_irq(desc);
 
 #ifdef CONFIG_DEBUG_SHIRQ
        /*
index edec335..5f2c668 100644 (file)
@@ -68,11 +68,16 @@ static int irq_sw_resend(struct irq_desc *desc)
                 */
                if (!desc->parent_irq)
                        return -EINVAL;
+
+               desc = irq_to_desc(desc->parent_irq);
+               if (!desc)
+                       return -EINVAL;
        }
 
        /* Add to resend_list and activate the softirq: */
        raw_spin_lock(&irq_resend_lock);
-       hlist_add_head(&desc->resend_node, &irq_resend_list);
+       if (hlist_unhashed(&desc->resend_node))
+               hlist_add_head(&desc->resend_node, &irq_resend_list);
        raw_spin_unlock(&irq_resend_lock);
        tasklet_schedule(&resend_tasklet);
        return 0;
index 7982cc9..18edd57 100644 (file)
@@ -163,42 +163,36 @@ unsigned long kallsyms_sym_address(int idx)
        return kallsyms_relative_base - 1 - kallsyms_offsets[idx];
 }
 
-static bool cleanup_symbol_name(char *s)
+static void cleanup_symbol_name(char *s)
 {
        char *res;
 
        if (!IS_ENABLED(CONFIG_LTO_CLANG))
-               return false;
+               return;
 
        /*
         * LLVM appends various suffixes for local functions and variables that
         * must be promoted to global scope as part of LTO.  This can break
         * hooking of static functions with kprobes. '.' is not a valid
-        * character in an identifier in C. Suffixes observed:
+        * character in an identifier in C. Suffixes only in LLVM LTO observed:
         * - foo.llvm.[0-9a-f]+
-        * - foo.[0-9a-f]+
         */
-       res = strchr(s, '.');
-       if (res) {
+       res = strstr(s, ".llvm.");
+       if (res)
                *res = '\0';
-               return true;
-       }
 
-       return false;
+       return;
 }
 
 static int compare_symbol_name(const char *name, char *namebuf)
 {
-       int ret;
-
-       ret = strcmp(name, namebuf);
-       if (!ret)
-               return ret;
-
-       if (cleanup_symbol_name(namebuf) && !strcmp(name, namebuf))
-               return 0;
-
-       return ret;
+       /* The kallsyms_seqs_of_names is sorted based on names after
+        * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled.
+        * To ensure correct bisection in kallsyms_lookup_names(), do
+        * cleanup_symbol_name(namebuf) before comparing name and namebuf.
+        */
+       cleanup_symbol_name(namebuf);
+       return strcmp(name, namebuf);
 }
 
 static unsigned int get_symbol_seq(int index)
index a2e3745..e05ddc3 100644 (file)
@@ -196,7 +196,7 @@ static bool match_cleanup_name(const char *s, const char *name)
        if (!IS_ENABLED(CONFIG_LTO_CLANG))
                return false;
 
-       p = strchr(s, '.');
+       p = strstr(s, ".llvm.");
        if (!p)
                return false;
 
@@ -344,27 +344,6 @@ static int test_kallsyms_basic_function(void)
                        goto failed;
                }
 
-               /*
-                * The first '.' may be the initial letter, in which case the
-                * entire symbol name will be truncated to an empty string in
-                * cleanup_symbol_name(). Do not test these symbols.
-                *
-                * For example:
-                * cat /proc/kallsyms | awk '{print $3}' | grep -E "^\." | head
-                * .E_read_words
-                * .E_leading_bytes
-                * .E_trailing_bytes
-                * .E_write_words
-                * .E_copy
-                * .str.292.llvm.12122243386960820698
-                * .str.24.llvm.12122243386960820698
-                * .str.29.llvm.12122243386960820698
-                * .str.75.llvm.12122243386960820698
-                * .str.99.llvm.12122243386960820698
-                */
-               if (IS_ENABLED(CONFIG_LTO_CLANG) && !namebuf[0])
-                       continue;
-
                lookup_addr = kallsyms_lookup_name(namebuf);
 
                memset(stat, 0, sizeof(*stat));
index ce13f1a..ca385b6 100644 (file)
@@ -1072,7 +1072,7 @@ static int kprobe_ftrace_enabled;
 static int __arm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
                               int *cnt)
 {
-       int ret = 0;
+       int ret;
 
        lockdep_assert_held(&kprobe_mutex);
 
@@ -1110,7 +1110,7 @@ static int arm_kprobe_ftrace(struct kprobe *p)
 static int __disarm_kprobe_ftrace(struct kprobe *p, struct ftrace_ops *ops,
                                  int *cnt)
 {
-       int ret = 0;
+       int ret;
 
        lockdep_assert_held(&kprobe_mutex);
 
@@ -1545,6 +1545,17 @@ static int check_ftrace_location(struct kprobe *p)
        return 0;
 }
 
+static bool is_cfi_preamble_symbol(unsigned long addr)
+{
+       char symbuf[KSYM_NAME_LEN];
+
+       if (lookup_symbol_name(addr, symbuf))
+               return false;
+
+       return str_has_prefix("__cfi_", symbuf) ||
+               str_has_prefix("__pfx_", symbuf);
+}
+
 static int check_kprobe_address_safe(struct kprobe *p,
                                     struct module **probed_mod)
 {
@@ -1563,7 +1574,8 @@ static int check_kprobe_address_safe(struct kprobe *p,
            within_kprobe_blacklist((unsigned long) p->addr) ||
            jump_label_text_reserved(p->addr, p->addr) ||
            static_call_text_reserved(p->addr, p->addr) ||
-           find_bug((unsigned long)p->addr)) {
+           find_bug((unsigned long)p->addr) ||
+           is_cfi_preamble_symbol((unsigned long)p->addr)) {
                ret = -EINVAL;
                goto out;
        }
@@ -2007,9 +2019,9 @@ void __weak arch_kretprobe_fixup_return(struct pt_regs *regs,
 unsigned long __kretprobe_trampoline_handler(struct pt_regs *regs,
                                             void *frame_pointer)
 {
-       kprobe_opcode_t *correct_ret_addr = NULL;
        struct kretprobe_instance *ri = NULL;
        struct llist_node *first, *node = NULL;
+       kprobe_opcode_t *correct_ret_addr;
        struct kretprobe *rp;
 
        /* Find correct address and all nodes for this frame. */
@@ -2693,7 +2705,7 @@ void kprobe_free_init_mem(void)
 
 static int __init init_kprobes(void)
 {
-       int i, err = 0;
+       int i, err;
 
        /* FIXME allocate the probe table, currently defined statically */
        /* initialize all list heads */
index 949d3de..270c7f8 100644 (file)
@@ -45,6 +45,7 @@ torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable");
 torture_param(int, rt_boost, 2,
                   "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types.");
 torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens.");
+torture_param(int, writer_fifo, 0, "Run writers at sched_set_fifo() priority");
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)");
 /* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */
@@ -809,7 +810,8 @@ static int lock_torture_writer(void *arg)
        bool skip_main_lock;
 
        VERBOSE_TOROUT_STRING("lock_torture_writer task started");
-       set_user_nice(current, MAX_NICE);
+       if (!rt_task(current))
+               set_user_nice(current, MAX_NICE);
 
        do {
                if ((torture_random(&rand) & 0xfffff) == 0)
@@ -1015,8 +1017,7 @@ static void lock_torture_cleanup(void)
 
        if (writer_tasks) {
                for (i = 0; i < cxt.nrealwriters_stress; i++)
-                       torture_stop_kthread(lock_torture_writer,
-                                            writer_tasks[i]);
+                       torture_stop_kthread(lock_torture_writer, writer_tasks[i]);
                kfree(writer_tasks);
                writer_tasks = NULL;
        }
@@ -1244,8 +1245,9 @@ static int __init lock_torture_init(void)
                        goto create_reader;
 
                /* Create writer. */
-               firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
-                                                 writer_tasks[i]);
+               firsterr = torture_create_kthread_cb(lock_torture_writer, &cxt.lwsa[i],
+                                                    writer_tasks[i],
+                                                    writer_fifo ? sched_set_fifo : NULL);
                if (torture_init_error(firsterr))
                        goto unwind;
 
index 6afc249..6a0184e 100644 (file)
@@ -486,6 +486,16 @@ gotlock:
 }
 
 /*
+ * Include the architecture specific callee-save thunk of the
+ * __pv_queued_spin_unlock(). This thunk is put together with
+ * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
+ * function close to each other sharing consecutive instruction cachelines.
+ * Alternatively, architecture specific version of __pv_queued_spin_unlock()
+ * can be defined.
+ */
+#include <asm/qspinlock_paravirt.h>
+
+/*
  * PV versions of the unlock fastpath and slowpath functions to be used
  * instead of queued_spin_unlock().
  */
@@ -533,16 +543,6 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
        pv_kick(node->cpu);
 }
 
-/*
- * Include the architecture specific callee-save thunk of the
- * __pv_queued_spin_unlock(). This thunk is put together with
- * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
- * function close to each other sharing consecutive instruction cachelines.
- * Alternatively, architecture specific version of __pv_queued_spin_unlock()
- * can be defined.
- */
-#include <asm/qspinlock_paravirt.h>
-
 #ifndef __pv_queued_spin_unlock
 __visible __lockfunc void __pv_queued_spin_unlock(struct qspinlock *lock)
 {
index 728f434..21db0df 100644 (file)
@@ -333,21 +333,43 @@ static __always_inline int __waiter_prio(struct task_struct *task)
        return prio;
 }
 
+/*
+ * Update the waiter->tree copy of the sort keys.
+ */
 static __always_inline void
 waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
 {
-       waiter->prio = __waiter_prio(task);
-       waiter->deadline = task->dl.deadline;
+       lockdep_assert_held(&waiter->lock->wait_lock);
+       lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
+
+       waiter->tree.prio = __waiter_prio(task);
+       waiter->tree.deadline = task->dl.deadline;
+}
+
+/*
+ * Update the waiter->pi_tree copy of the sort keys (from the tree copy).
+ */
+static __always_inline void
+waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
+{
+       lockdep_assert_held(&waiter->lock->wait_lock);
+       lockdep_assert_held(&task->pi_lock);
+       lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
+
+       waiter->pi_tree.prio = waiter->tree.prio;
+       waiter->pi_tree.deadline = waiter->tree.deadline;
 }
 
 /*
- * Only use with rt_mutex_waiter_{less,equal}()
+ * Only use with rt_waiter_node_{less,equal}()
  */
+#define task_to_waiter_node(p) \
+       &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
 #define task_to_waiter(p)      \
-       &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
+       &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
 
-static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
-                                               struct rt_mutex_waiter *right)
+static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
+                                              struct rt_waiter_node *right)
 {
        if (left->prio < right->prio)
                return 1;
@@ -364,8 +386,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
        return 0;
 }
 
-static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
-                                                struct rt_mutex_waiter *right)
+static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
+                                                struct rt_waiter_node *right)
 {
        if (left->prio != right->prio)
                return 0;
@@ -385,7 +407,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
 static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
                                  struct rt_mutex_waiter *top_waiter)
 {
-       if (rt_mutex_waiter_less(waiter, top_waiter))
+       if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))
                return true;
 
 #ifdef RT_MUTEX_BUILD_SPINLOCKS
@@ -393,30 +415,30 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
         * Note that RT tasks are excluded from same priority (lateral)
         * steals to prevent the introduction of an unbounded latency.
         */
-       if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
+       if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
                return false;
 
-       return rt_mutex_waiter_equal(waiter, top_waiter);
+       return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
 #else
        return false;
 #endif
 }
 
 #define __node_2_waiter(node) \
-       rb_entry((node), struct rt_mutex_waiter, tree_entry)
+       rb_entry((node), struct rt_mutex_waiter, tree.entry)
 
 static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
 {
        struct rt_mutex_waiter *aw = __node_2_waiter(a);
        struct rt_mutex_waiter *bw = __node_2_waiter(b);
 
-       if (rt_mutex_waiter_less(aw, bw))
+       if (rt_waiter_node_less(&aw->tree, &bw->tree))
                return 1;
 
        if (!build_ww_mutex())
                return 0;
 
-       if (rt_mutex_waiter_less(bw, aw))
+       if (rt_waiter_node_less(&bw->tree, &aw->tree))
                return 0;
 
        /* NOTE: relies on waiter->ww_ctx being set before insertion */
@@ -434,48 +456,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod
 static __always_inline void
 rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
 {
-       rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
+       lockdep_assert_held(&lock->wait_lock);
+
+       rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);
 }
 
 static __always_inline void
 rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
 {
-       if (RB_EMPTY_NODE(&waiter->tree_entry))
+       lockdep_assert_held(&lock->wait_lock);
+
+       if (RB_EMPTY_NODE(&waiter->tree.entry))
                return;
 
-       rb_erase_cached(&waiter->tree_entry, &lock->waiters);
-       RB_CLEAR_NODE(&waiter->tree_entry);
+       rb_erase_cached(&waiter->tree.entry, &lock->waiters);
+       RB_CLEAR_NODE(&waiter->tree.entry);
 }
 
-#define __node_2_pi_waiter(node) \
-       rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
+#define __node_2_rt_node(node) \
+       rb_entry((node), struct rt_waiter_node, entry)
 
-static __always_inline bool
-__pi_waiter_less(struct rb_node *a, const struct rb_node *b)
+static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
 {
-       return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
+       return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
 }
 
 static __always_inline void
 rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 {
-       rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
+       lockdep_assert_held(&task->pi_lock);
+
+       rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);
 }
 
 static __always_inline void
 rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
 {
-       if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
+       lockdep_assert_held(&task->pi_lock);
+
+       if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
                return;
 
-       rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
-       RB_CLEAR_NODE(&waiter->pi_tree_entry);
+       rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);
+       RB_CLEAR_NODE(&waiter->pi_tree.entry);
 }
 
-static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
+static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
+                                                struct task_struct *p)
 {
        struct task_struct *pi_task = NULL;
 
+       lockdep_assert_held(&lock->wait_lock);
+       lockdep_assert(rt_mutex_owner(lock) == p);
        lockdep_assert_held(&p->pi_lock);
 
        if (task_has_pi_waiters(p))
@@ -571,9 +603,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
  * Chain walk basics and protection scope
  *
  * [R] refcount on task
- * [P] task->pi_lock held
+ * [Pn] task->pi_lock held
  * [L] rtmutex->wait_lock held
  *
+ * Normal locking order:
+ *
+ *   rtmutex->wait_lock
+ *     task->pi_lock
+ *
  * Step        Description                             Protected by
  *     function arguments:
  *     @task                                   [R]
@@ -588,27 +625,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
  *     again:
  *       loop_sanity_check();
  *     retry:
- * [1]   lock(task->pi_lock);                  [R] acquire [P]
- * [2]   waiter = task->pi_blocked_on;         [P]
- * [3]   check_exit_conditions_1();            [P]
- * [4]   lock = waiter->lock;                  [P]
- * [5]   if (!try_lock(lock->wait_lock)) {     [P] try to acquire [L]
- *         unlock(task->pi_lock);              release [P]
+ * [1]   lock(task->pi_lock);                  [R] acquire [P1]
+ * [2]   waiter = task->pi_blocked_on;         [P1]
+ * [3]   check_exit_conditions_1();            [P1]
+ * [4]   lock = waiter->lock;                  [P1]
+ * [5]   if (!try_lock(lock->wait_lock)) {     [P1] try to acquire [L]
+ *         unlock(task->pi_lock);              release [P1]
  *         goto retry;
  *       }
- * [6]   check_exit_conditions_2();            [P] + [L]
- * [7]   requeue_lock_waiter(lock, waiter);    [P] + [L]
- * [8]   unlock(task->pi_lock);                release [P]
+ * [6]   check_exit_conditions_2();            [P1] + [L]
+ * [7]   requeue_lock_waiter(lock, waiter);    [P1] + [L]
+ * [8]   unlock(task->pi_lock);                release [P1]
  *       put_task_struct(task);                release [R]
  * [9]   check_exit_conditions_3();            [L]
  * [10]          task = owner(lock);                   [L]
  *       get_task_struct(task);                [L] acquire [R]
- *       lock(task->pi_lock);                  [L] acquire [P]
- * [11]          requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
- * [12]          check_exit_conditions_4();            [P] + [L]
- * [13]          unlock(task->pi_lock);                release [P]
+ *       lock(task->pi_lock);                  [L] acquire [P2]
+ * [11]          requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
+ * [12]          check_exit_conditions_4();            [P2] + [L]
+ * [13]          unlock(task->pi_lock);                release [P2]
  *       unlock(lock->wait_lock);              release [L]
  *       goto again;
+ *
+ * Where P1 is the blocking task and P2 is the lock owner; going up one step
+ * the owner becomes the next blocked task etc..
+ *
+*
  */
 static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
                                              enum rtmutex_chainwalk chwalk,
@@ -756,7 +798,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
         * enabled we continue, but stop the requeueing in the chain
         * walk.
         */
-       if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+       if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
                if (!detect_deadlock)
                        goto out_unlock_pi;
                else
@@ -764,13 +806,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
        }
 
        /*
-        * [4] Get the next lock
+        * [4] Get the next lock; per holding task->pi_lock we can't unblock
+        * and guarantee @lock's existence.
         */
        lock = waiter->lock;
        /*
         * [5] We need to trylock here as we are holding task->pi_lock,
         * which is the reverse lock order versus the other rtmutex
         * operations.
+        *
+        * Per the above, holding task->pi_lock guarantees lock exists, so
+        * inverting this lock order is infeasible from a life-time
+        * perspective.
         */
        if (!raw_spin_trylock(&lock->wait_lock)) {
                raw_spin_unlock_irq(&task->pi_lock);
@@ -874,17 +921,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
         * or
         *
         *   DL CBS enforcement advancing the effective deadline.
-        *
-        * Even though pi_waiters also uses these fields, and that tree is only
-        * updated in [11], we can do this here, since we hold [L], which
-        * serializes all pi_waiters access and rb_erase() does not care about
-        * the values of the node being removed.
         */
        waiter_update_prio(waiter, task);
 
        rt_mutex_enqueue(lock, waiter);
 
-       /* [8] Release the task */
+       /*
+        * [8] Release the (blocking) task in preparation for
+        * taking the owner task in [10].
+        *
+        * Since we hold lock->waiter_lock, task cannot unblock, even if we
+        * release task->pi_lock.
+        */
        raw_spin_unlock(&task->pi_lock);
        put_task_struct(task);
 
@@ -908,7 +956,12 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
                return 0;
        }
 
-       /* [10] Grab the next task, i.e. the owner of @lock */
+       /*
+        * [10] Grab the next task, i.e. the owner of @lock
+        *
+        * Per holding lock->wait_lock and checking for !owner above, there
+        * must be an owner and it cannot go away.
+        */
        task = get_task_struct(rt_mutex_owner(lock));
        raw_spin_lock(&task->pi_lock);
 
@@ -921,8 +974,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
                 * and adjust the priority of the owner.
                 */
                rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
+               waiter_clone_prio(waiter, task);
                rt_mutex_enqueue_pi(task, waiter);
-               rt_mutex_adjust_prio(task);
+               rt_mutex_adjust_prio(lock, task);
 
        } else if (prerequeue_top_waiter == waiter) {
                /*
@@ -937,8 +991,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
                 */
                rt_mutex_dequeue_pi(task, waiter);
                waiter = rt_mutex_top_waiter(lock);
+               waiter_clone_prio(waiter, task);
                rt_mutex_enqueue_pi(task, waiter);
-               rt_mutex_adjust_prio(task);
+               rt_mutex_adjust_prio(lock, task);
        } else {
                /*
                 * Nothing changed. No need to do any priority
@@ -1154,6 +1209,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
        waiter->task = task;
        waiter->lock = lock;
        waiter_update_prio(waiter, task);
+       waiter_clone_prio(waiter, task);
 
        /* Get the top priority waiter on the lock */
        if (rt_mutex_has_waiters(lock))
@@ -1187,7 +1243,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
                rt_mutex_dequeue_pi(owner, top_waiter);
                rt_mutex_enqueue_pi(owner, waiter);
 
-               rt_mutex_adjust_prio(owner);
+               rt_mutex_adjust_prio(lock, owner);
                if (owner->pi_blocked_on)
                        chain_walk = 1;
        } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1234,6 +1290,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
 {
        struct rt_mutex_waiter *waiter;
 
+       lockdep_assert_held(&lock->wait_lock);
+
        raw_spin_lock(&current->pi_lock);
 
        waiter = rt_mutex_top_waiter(lock);
@@ -1246,7 +1304,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
         * task unblocks.
         */
        rt_mutex_dequeue_pi(current, waiter);
-       rt_mutex_adjust_prio(current);
+       rt_mutex_adjust_prio(lock, current);
 
        /*
         * As we are waking up the top waiter, and the waiter stays
@@ -1482,7 +1540,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
        if (rt_mutex_has_waiters(lock))
                rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
 
-       rt_mutex_adjust_prio(owner);
+       rt_mutex_adjust_prio(lock, owner);
 
        /* Store the lock on which owner is blocked or NULL */
        next_lock = task_blocked_on_lock(owner);
index cb9fdff..a6974d0 100644 (file)
@@ -459,7 +459,7 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task)
        raw_spin_lock_irqsave(&task->pi_lock, flags);
 
        waiter = task->pi_blocked_on;
-       if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+       if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
                raw_spin_unlock_irqrestore(&task->pi_lock, flags);
                return;
        }
index c47e836..1162e07 100644 (file)
 #include <linux/rtmutex.h>
 #include <linux/sched/wake_q.h>
 
+
+/*
+ * This is a helper for the struct rt_mutex_waiter below. A waiter goes in two
+ * separate trees and they need their own copy of the sort keys because of
+ * different locking requirements.
+ *
+ * @entry:             rbtree node to enqueue into the waiters tree
+ * @prio:              Priority of the waiter
+ * @deadline:          Deadline of the waiter if applicable
+ *
+ * See rt_waiter_node_less() and waiter_*_prio().
+ */
+struct rt_waiter_node {
+       struct rb_node  entry;
+       int             prio;
+       u64             deadline;
+};
+
 /*
  * This is the control structure for tasks blocked on a rt_mutex,
  * which is allocated on the kernel stack on of the blocked task.
  *
- * @tree_entry:                pi node to enqueue into the mutex waiters tree
- * @pi_tree_entry:     pi node to enqueue into the mutex owner waiters tree
+ * @tree:              node to enqueue into the mutex waiters tree
+ * @pi_tree:           node to enqueue into the mutex owner waiters tree
  * @task:              task reference to the blocked task
  * @lock:              Pointer to the rt_mutex on which the waiter blocks
  * @wake_state:                Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT)
- * @prio:              Priority of the waiter
- * @deadline:          Deadline of the waiter if applicable
  * @ww_ctx:            WW context pointer
+ *
+ * @tree is ordered by @lock->wait_lock
+ * @pi_tree is ordered by rt_mutex_owner(@lock)->pi_lock
  */
 struct rt_mutex_waiter {
-       struct rb_node          tree_entry;
-       struct rb_node          pi_tree_entry;
+       struct rt_waiter_node   tree;
+       struct rt_waiter_node   pi_tree;
        struct task_struct      *task;
        struct rt_mutex_base    *lock;
        unsigned int            wake_state;
-       int                     prio;
-       u64                     deadline;
        struct ww_acquire_ctx   *ww_ctx;
 };
 
@@ -105,7 +122,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
 {
        struct rb_node *leftmost = rb_first_cached(&lock->waiters);
 
-       return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
+       return rb_entry(leftmost, struct rt_mutex_waiter, tree.entry) == waiter;
 }
 
 static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
@@ -113,8 +130,10 @@ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *
        struct rb_node *leftmost = rb_first_cached(&lock->waiters);
        struct rt_mutex_waiter *w = NULL;
 
+       lockdep_assert_held(&lock->wait_lock);
+
        if (leftmost) {
-               w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry);
+               w = rb_entry(leftmost, struct rt_mutex_waiter, tree.entry);
                BUG_ON(w->lock != lock);
        }
        return w;
@@ -127,8 +146,10 @@ static inline int task_has_pi_waiters(struct task_struct *p)
 
 static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
 {
+       lockdep_assert_held(&p->pi_lock);
+
        return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter,
-                       pi_tree_entry);
+                       pi_tree.entry);
 }
 
 #define RT_MUTEX_HAS_WAITERS   1UL
@@ -190,8 +211,8 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
 static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
 {
        debug_rt_mutex_init_waiter(waiter);
-       RB_CLEAR_NODE(&waiter->pi_tree_entry);
-       RB_CLEAR_NODE(&waiter->tree_entry);
+       RB_CLEAR_NODE(&waiter->pi_tree.entry);
+       RB_CLEAR_NODE(&waiter->tree.entry);
        waiter->wake_state = TASK_NORMAL;
        waiter->task = NULL;
 }
index 56f1392..3ad2cc4 100644 (file)
@@ -96,25 +96,25 @@ __ww_waiter_first(struct rt_mutex *lock)
        struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
        if (!n)
                return NULL;
-       return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+       return rb_entry(n, struct rt_mutex_waiter, tree.entry);
 }
 
 static inline struct rt_mutex_waiter *
 __ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w)
 {
-       struct rb_node *n = rb_next(&w->tree_entry);
+       struct rb_node *n = rb_next(&w->tree.entry);
        if (!n)
                return NULL;
-       return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+       return rb_entry(n, struct rt_mutex_waiter, tree.entry);
 }
 
 static inline struct rt_mutex_waiter *
 __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
 {
-       struct rb_node *n = rb_prev(&w->tree_entry);
+       struct rb_node *n = rb_prev(&w->tree.entry);
        if (!n)
                return NULL;
-       return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+       return rb_entry(n, struct rt_mutex_waiter, tree.entry);
 }
 
 static inline struct rt_mutex_waiter *
@@ -123,7 +123,7 @@ __ww_waiter_last(struct rt_mutex *lock)
        struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
        if (!n)
                return NULL;
-       return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+       return rb_entry(n, struct rt_mutex_waiter, tree.entry);
 }
 
 static inline void
index 80d9c6d..15781ac 100644 (file)
@@ -30,7 +30,7 @@
 static struct kmem_cache *nsproxy_cachep;
 
 struct nsproxy init_nsproxy = {
-       .count                  = ATOMIC_INIT(1),
+       .count                  = REFCOUNT_INIT(1),
        .uts_ns                 = &init_uts_ns,
 #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
        .ipc_ns                 = &init_ipc_ns,
@@ -55,7 +55,7 @@ static inline struct nsproxy *create_nsproxy(void)
 
        nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
        if (nsproxy)
-               atomic_set(&nsproxy->count, 1);
+               refcount_set(&nsproxy->count, 1);
        return nsproxy;
 }
 
index f62e89d..2b4a946 100644 (file)
@@ -1166,7 +1166,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
        int error;
 
        if (!hibernation_available())
-               return 0;
+               return n;
 
        if (len && buf[len-1] == '\n')
                len--;
@@ -1179,6 +1179,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
                unsigned maj, min, offset;
                char *p, dummy;
 
+               error = 0;
                if (sscanf(name, "%u:%u%c", &maj, &min, &dummy) == 2 ||
                    sscanf(name, "%u:%u:%u:%c", &maj, &min, &offset,
                                &dummy) == 3) {
index af51ed6..4244b06 100644 (file)
@@ -220,6 +220,11 @@ static struct pm_qos_constraints cpu_latency_constraints = {
        .type = PM_QOS_MIN,
 };
 
+static inline bool cpu_latency_qos_value_invalid(s32 value)
+{
+       return value < 0 && value != PM_QOS_DEFAULT_VALUE;
+}
+
 /**
  * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit.
  */
@@ -263,7 +268,7 @@ static void cpu_latency_qos_apply(struct pm_qos_request *req,
  */
 void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value)
 {
-       if (!req)
+       if (!req || cpu_latency_qos_value_invalid(value))
                return;
 
        if (cpu_latency_qos_request_active(req)) {
@@ -289,7 +294,7 @@ EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request);
  */
 void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value)
 {
-       if (!req)
+       if (!req || cpu_latency_qos_value_invalid(new_value))
                return;
 
        if (!cpu_latency_qos_request_active(req)) {
@@ -426,6 +431,11 @@ late_initcall(cpu_latency_qos_init);
 
 /* Definitions related to the frequency QoS below. */
 
+static inline bool freq_qos_value_invalid(s32 value)
+{
+       return value < 0 && value != PM_QOS_DEFAULT_VALUE;
+}
+
 /**
  * freq_constraints_init - Initialize frequency QoS constraints.
  * @qos: Frequency QoS constraints to initialize.
@@ -531,7 +541,7 @@ int freq_qos_add_request(struct freq_constraints *qos,
 {
        int ret;
 
-       if (IS_ERR_OR_NULL(qos) || !req || value < 0)
+       if (IS_ERR_OR_NULL(qos) || !req || freq_qos_value_invalid(value))
                return -EINVAL;
 
        if (WARN(freq_qos_request_active(req),
@@ -563,7 +573,7 @@ EXPORT_SYMBOL_GPL(freq_qos_add_request);
  */
 int freq_qos_update_request(struct freq_qos_request *req, s32 new_value)
 {
-       if (!req || new_value < 0)
+       if (!req || freq_qos_value_invalid(new_value))
                return -EINVAL;
 
        if (WARN(!freq_qos_request_active(req),
index 0415d5e..87e9f7e 100644 (file)
@@ -404,6 +404,7 @@ struct bm_position {
        struct mem_zone_bm_rtree *zone;
        struct rtree_node *node;
        unsigned long node_pfn;
+       unsigned long cur_pfn;
        int node_bit;
 };
 
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
        bm->cur.node = list_entry(bm->cur.zone->leaves.next,
                                  struct rtree_node, list);
        bm->cur.node_pfn = 0;
+       bm->cur.cur_pfn = BM_END_OF_MAP;
        bm->cur.node_bit = 0;
 }
 
@@ -799,6 +801,7 @@ node_found:
        bm->cur.zone = zone;
        bm->cur.node = node;
        bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
+       bm->cur.cur_pfn = pfn;
 
        /* Set return values */
        *addr = node->data;
@@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
        clear_bit(bit, bm->cur.node->data);
 }
 
+static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
+{
+       return bm->cur.cur_pfn;
+}
+
 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
 {
        void *addr;
@@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
                if (bit < bits) {
                        pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
                        bm->cur.node_bit = bit + 1;
+                       bm->cur.cur_pfn = pfn;
                        return pfn;
                }
        } while (rtree_next_node(bm));
 
+       bm->cur.cur_pfn = BM_END_OF_MAP;
        return BM_END_OF_MAP;
 }
 
@@ -1423,14 +1433,19 @@ static unsigned int count_data_pages(void)
 
 /*
  * This is needed, because copy_page and memcpy are not usable for copying
- * task structs.
+ * task structs. Returns true if the page was filled with only zeros,
+ * otherwise false.
  */
-static inline void do_copy_page(long *dst, long *src)
+static inline bool do_copy_page(long *dst, long *src)
 {
+       long z = 0;
        int n;
 
-       for (n = PAGE_SIZE / sizeof(long); n; n--)
+       for (n = PAGE_SIZE / sizeof(long); n; n--) {
+               z |= *src;
                *dst++ = *src++;
+       }
+       return !z;
 }
 
 /**
@@ -1439,17 +1454,21 @@ static inline void do_copy_page(long *dst, long *src)
  * Check if the page we are going to copy is marked as present in the kernel
  * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
  * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
- * always returns 'true'.
+ * always returns 'true'. Returns true if the page was entirely composed of
+ * zeros, otherwise it will return false.
  */
-static void safe_copy_page(void *dst, struct page *s_page)
+static bool safe_copy_page(void *dst, struct page *s_page)
 {
+       bool zeros_only;
+
        if (kernel_page_present(s_page)) {
-               do_copy_page(dst, page_address(s_page));
+               zeros_only = do_copy_page(dst, page_address(s_page));
        } else {
                hibernate_map_page(s_page);
-               do_copy_page(dst, page_address(s_page));
+               zeros_only = do_copy_page(dst, page_address(s_page));
                hibernate_unmap_page(s_page);
        }
+       return zeros_only;
 }
 
 #ifdef CONFIG_HIGHMEM
@@ -1459,17 +1478,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
                saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
 }
 
-static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
        struct page *s_page, *d_page;
        void *src, *dst;
+       bool zeros_only;
 
        s_page = pfn_to_page(src_pfn);
        d_page = pfn_to_page(dst_pfn);
        if (PageHighMem(s_page)) {
                src = kmap_atomic(s_page);
                dst = kmap_atomic(d_page);
-               do_copy_page(dst, src);
+               zeros_only = do_copy_page(dst, src);
                kunmap_atomic(dst);
                kunmap_atomic(src);
        } else {
@@ -1478,30 +1498,39 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
                         * The page pointed to by src may contain some kernel
                         * data modified by kmap_atomic()
                         */
-                       safe_copy_page(buffer, s_page);
+                       zeros_only = safe_copy_page(buffer, s_page);
                        dst = kmap_atomic(d_page);
                        copy_page(dst, buffer);
                        kunmap_atomic(dst);
                } else {
-                       safe_copy_page(page_address(d_page), s_page);
+                       zeros_only = safe_copy_page(page_address(d_page), s_page);
                }
        }
+       return zeros_only;
 }
 #else
 #define page_is_saveable(zone, pfn)    saveable_page(zone, pfn)
 
-static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
+static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
 {
-       safe_copy_page(page_address(pfn_to_page(dst_pfn)),
+       return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
                                pfn_to_page(src_pfn));
 }
 #endif /* CONFIG_HIGHMEM */
 
-static void copy_data_pages(struct memory_bitmap *copy_bm,
-                           struct memory_bitmap *orig_bm)
+/*
+ * Copy data pages will copy all pages into pages pulled from the copy_bm.
+ * If a page was entirely filled with zeros it will be marked in the zero_bm.
+ *
+ * Returns the number of pages copied.
+ */
+static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
+                           struct memory_bitmap *orig_bm,
+                           struct memory_bitmap *zero_bm)
 {
+       unsigned long copied_pages = 0;
        struct zone *zone;
-       unsigned long pfn;
+       unsigned long pfn, copy_pfn;
 
        for_each_populated_zone(zone) {
                unsigned long max_zone_pfn;
@@ -1514,18 +1543,29 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
        }
        memory_bm_position_reset(orig_bm);
        memory_bm_position_reset(copy_bm);
+       copy_pfn = memory_bm_next_pfn(copy_bm);
        for(;;) {
                pfn = memory_bm_next_pfn(orig_bm);
                if (unlikely(pfn == BM_END_OF_MAP))
                        break;
-               copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
+               if (copy_data_page(copy_pfn, pfn)) {
+                       memory_bm_set_bit(zero_bm, pfn);
+                       /* Use this copy_pfn for a page that is not full of zeros */
+                       continue;
+               }
+               copied_pages++;
+               copy_pfn = memory_bm_next_pfn(copy_bm);
        }
+       return copied_pages;
 }
 
 /* Total number of image pages */
 static unsigned int nr_copy_pages;
 /* Number of pages needed for saving the original pfns of the image pages */
 static unsigned int nr_meta_pages;
+/* Number of zero pages */
+static unsigned int nr_zero_pages;
+
 /*
  * Numbers of normal and highmem page frames allocated for hibernation image
  * before suspending devices.
@@ -1546,6 +1586,9 @@ static struct memory_bitmap orig_bm;
  */
 static struct memory_bitmap copy_bm;
 
+/* Memory bitmap which tracks which saveable pages were zero filled. */
+static struct memory_bitmap zero_bm;
+
 /**
  * swsusp_free - Free pages allocated for hibernation image.
  *
@@ -1590,6 +1633,7 @@ loop:
 out:
        nr_copy_pages = 0;
        nr_meta_pages = 0;
+       nr_zero_pages = 0;
        restore_pblist = NULL;
        buffer = NULL;
        alloc_normal = 0;
@@ -1808,8 +1852,15 @@ int hibernate_preallocate_memory(void)
                goto err_out;
        }
 
+       error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
+       if (error) {
+               pr_err("Cannot allocate zero bitmap\n");
+               goto err_out;
+       }
+
        alloc_normal = 0;
        alloc_highmem = 0;
+       nr_zero_pages = 0;
 
        /* Count the number of saveable data pages. */
        save_highmem = count_highmem_pages();
@@ -2089,19 +2140,19 @@ asmlinkage __visible int swsusp_save(void)
         * Kill them.
         */
        drain_local_pages(NULL);
-       copy_data_pages(&copy_bm, &orig_bm);
+       nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
 
        /*
         * End of critical section. From now on, we can write to memory,
         * but we should not touch disk. This specially means we must _not_
         * touch swap space! Except we must write out our image of course.
         */
-
        nr_pages += nr_highmem;
-       nr_copy_pages = nr_pages;
+       /* We don't actually copy the zero pages */
+       nr_zero_pages = nr_pages - nr_copy_pages;
        nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
 
-       pr_info("Image created (%d pages copied)\n", nr_pages);
+       pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
 
        return 0;
 }
@@ -2146,15 +2197,22 @@ static int init_header(struct swsusp_info *info)
        return init_header_complete(info);
 }
 
+#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
+#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
+
 /**
  * pack_pfns - Prepare PFNs for saving.
  * @bm: Memory bitmap.
  * @buf: Memory buffer to store the PFNs in.
+ * @zero_bm: Memory bitmap containing PFNs of zero pages.
  *
  * PFNs corresponding to set bits in @bm are stored in the area of memory
- * pointed to by @buf (1 page at a time).
+ * pointed to by @buf (1 page at a time). Pages which were filled with only
+ * zeros will have the highest bit set in the packed format to distinguish
+ * them from PFNs which will be contained in the image file.
  */
-static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
+               struct memory_bitmap *zero_bm)
 {
        int j;
 
@@ -2162,6 +2220,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
                buf[j] = memory_bm_next_pfn(bm);
                if (unlikely(buf[j] == BM_END_OF_MAP))
                        break;
+               if (memory_bm_test_bit(zero_bm, buf[j]))
+                       buf[j] |= ENCODED_PFN_ZERO_FLAG;
        }
 }
 
@@ -2203,7 +2263,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
                memory_bm_position_reset(&copy_bm);
        } else if (handle->cur <= nr_meta_pages) {
                clear_page(buffer);
-               pack_pfns(buffer, &orig_bm);
+               pack_pfns(buffer, &orig_bm, &zero_bm);
        } else {
                struct page *page;
 
@@ -2299,24 +2359,35 @@ static int load_header(struct swsusp_info *info)
  * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
  * @bm: Memory bitmap.
  * @buf: Area of memory containing the PFNs.
+ * @zero_bm: Memory bitmap with the zero PFNs marked.
  *
  * For each element of the array pointed to by @buf (1 page at a time), set the
- * corresponding bit in @bm.
+ * corresponding bit in @bm. If the page was originally populated with only
+ * zeros then a corresponding bit will also be set in @zero_bm.
  */
-static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
+static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
+               struct memory_bitmap *zero_bm)
 {
+       unsigned long decoded_pfn;
+        bool zero;
        int j;
 
        for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
                if (unlikely(buf[j] == BM_END_OF_MAP))
                        break;
 
-               if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) {
-                       memory_bm_set_bit(bm, buf[j]);
+               zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
+               decoded_pfn = buf[j] & ENCODED_PFN_MASK;
+               if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
+                       memory_bm_set_bit(bm, decoded_pfn);
+                       if (zero) {
+                               memory_bm_set_bit(zero_bm, decoded_pfn);
+                               nr_zero_pages++;
+                       }
                } else {
-                       if (!pfn_valid(buf[j]))
+                       if (!pfn_valid(decoded_pfn))
                                pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
-                                      (unsigned long long)PFN_PHYS(buf[j]));
+                                      (unsigned long long)PFN_PHYS(decoded_pfn));
                        return -EFAULT;
                }
        }
@@ -2538,6 +2609,7 @@ static inline void free_highmem_data(void) {}
  * prepare_image - Make room for loading hibernation image.
  * @new_bm: Uninitialized memory bitmap structure.
  * @bm: Memory bitmap with unsafe pages marked.
+ * @zero_bm: Memory bitmap containing the zero pages.
  *
  * Use @bm to mark the pages that will be overwritten in the process of
  * restoring the system memory state from the suspend image ("unsafe" pages)
@@ -2548,10 +2620,15 @@ static inline void free_highmem_data(void) {}
  * pages will be used for just yet.  Instead, we mark them all as allocated and
  * create a lists of "safe" pages to be used later.  On systems with high
  * memory a list of "safe" highmem pages is created too.
+ *
+ * Because it was not known which pages were unsafe when @zero_bm was created,
+ * make a copy of it and recreate it within safe pages.
  */
-static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
+static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
+               struct memory_bitmap *zero_bm)
 {
        unsigned int nr_pages, nr_highmem;
+       struct memory_bitmap tmp;
        struct linked_page *lp;
        int error;
 
@@ -2568,6 +2645,24 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
 
        duplicate_memory_bitmap(new_bm, bm);
        memory_bm_free(bm, PG_UNSAFE_KEEP);
+
+       /* Make a copy of zero_bm so it can be created in safe pages */
+       error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
+       if (error)
+               goto Free;
+
+       duplicate_memory_bitmap(&tmp, zero_bm);
+       memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
+
+       /* Recreate zero_bm in safe pages */
+       error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
+       if (error)
+               goto Free;
+
+       duplicate_memory_bitmap(zero_bm, &tmp);
+       memory_bm_free(&tmp, PG_UNSAFE_KEEP);
+       /* At this point zero_bm is in safe pages and it can be used for restoring. */
+
        if (nr_highmem > 0) {
                error = prepare_highmem_image(bm, &nr_highmem);
                if (error)
@@ -2582,7 +2677,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
         *
         * nr_copy_pages cannot be less than allocated_unsafe_pages too.
         */
-       nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+       nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
        nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
        while (nr_pages > 0) {
                lp = get_image_page(GFP_ATOMIC, PG_SAFE);
@@ -2595,7 +2690,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
                nr_pages--;
        }
        /* Preallocate memory for the image */
-       nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
+       nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
        while (nr_pages > 0) {
                lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
                if (!lp) {
@@ -2683,8 +2778,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
        static struct chain_allocator ca;
        int error = 0;
 
+next:
        /* Check if we have already loaded the entire image */
-       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
+       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
                return 0;
 
        handle->sync_read = 1;
@@ -2709,19 +2805,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
                if (error)
                        return error;
 
+               error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
+               if (error)
+                       return error;
+
+               nr_zero_pages = 0;
+
                hibernate_restore_protection_begin();
        } else if (handle->cur <= nr_meta_pages + 1) {
-               error = unpack_orig_pfns(buffer, &copy_bm);
+               error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
                if (error)
                        return error;
 
                if (handle->cur == nr_meta_pages + 1) {
-                       error = prepare_image(&orig_bm, &copy_bm);
+                       error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
                        if (error)
                                return error;
 
                        chain_init(&ca, GFP_ATOMIC, PG_SAFE);
                        memory_bm_position_reset(&orig_bm);
+                       memory_bm_position_reset(&zero_bm);
                        restore_pblist = NULL;
                        handle->buffer = get_buffer(&orig_bm, &ca);
                        handle->sync_read = 0;
@@ -2738,6 +2841,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
                        handle->sync_read = 0;
        }
        handle->cur++;
+
+       /* Zero pages were not included in the image, memset it and move on. */
+       if (handle->cur > nr_meta_pages + 1 &&
+           memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
+               memset(handle->buffer, 0, PAGE_SIZE);
+               goto next;
+       }
+
        return PAGE_SIZE;
 }
 
@@ -2754,7 +2865,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
        copy_last_highmem_page();
        hibernate_restore_protect_page(handle->buffer);
        /* Do that only if we have loaded the image entirely */
-       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
+       if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
                memory_bm_recycle(&orig_bm);
                free_highmem_data();
        }
@@ -2763,7 +2874,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
 int snapshot_image_loaded(struct snapshot_handle *handle)
 {
        return !(!nr_copy_pages || !last_highmem_page_copied() ||
-                       handle->cur <= nr_meta_pages + nr_copy_pages);
+                       handle->cur <= nr_meta_pages + nr_copy_pages + nr_zero_pages);
 }
 
 #ifdef CONFIG_HIGHMEM
index 98c1544..5befd87 100644 (file)
@@ -511,6 +511,14 @@ static inline void show_rcu_tasks_gp_kthreads(void) {}
 void rcu_request_urgent_qs_task(struct task_struct *t);
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 
+#ifdef CONFIG_TASKS_RCU
+struct task_struct *get_rcu_tasks_gp_kthread(void);
+#endif // # ifdef CONFIG_TASKS_RCU
+
+#ifdef CONFIG_TASKS_RUDE_RCU
+struct task_struct *get_rcu_tasks_rude_gp_kthread(void);
+#endif // # ifdef CONFIG_TASKS_RUDE_RCU
+
 #define RCU_SCHEDULER_INACTIVE 0
 #define RCU_SCHEDULER_INIT     1
 #define RCU_SCHEDULER_RUNNING  2
index d122173..ffdb304 100644 (file)
@@ -84,15 +84,17 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
 #endif
 
 torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
-torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
+torture_param(int, gp_async_max, 1000, "Max # outstanding waits per writer");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
+torture_param(int, minruntime, 0, "Minimum run time (s)");
 torture_param(int, nreaders, -1, "Number of RCU reader threads");
 torture_param(int, nwriters, -1, "Number of RCU updater threads");
 torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
              "Shutdown at end of scalability tests.");
 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
+torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
 torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
 torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
@@ -139,6 +141,7 @@ struct rcu_scale_ops {
        void (*gp_barrier)(void);
        void (*sync)(void);
        void (*exp_sync)(void);
+       struct task_struct *(*rso_gp_kthread)(void);
        const char *name;
 };
 
@@ -295,6 +298,7 @@ static struct rcu_scale_ops tasks_ops = {
        .gp_barrier     = rcu_barrier_tasks,
        .sync           = synchronize_rcu_tasks,
        .exp_sync       = synchronize_rcu_tasks,
+       .rso_gp_kthread = get_rcu_tasks_gp_kthread,
        .name           = "tasks"
 };
 
@@ -306,6 +310,44 @@ static struct rcu_scale_ops tasks_ops = {
 
 #endif // #else // #ifdef CONFIG_TASKS_RCU
 
+#ifdef CONFIG_TASKS_RUDE_RCU
+
+/*
+ * Definitions for RCU-tasks-rude scalability testing.
+ */
+
+static int tasks_rude_scale_read_lock(void)
+{
+       return 0;
+}
+
+static void tasks_rude_scale_read_unlock(int idx)
+{
+}
+
+static struct rcu_scale_ops tasks_rude_ops = {
+       .ptype          = RCU_TASKS_RUDE_FLAVOR,
+       .init           = rcu_sync_scale_init,
+       .readlock       = tasks_rude_scale_read_lock,
+       .readunlock     = tasks_rude_scale_read_unlock,
+       .get_gp_seq     = rcu_no_completed,
+       .gp_diff        = rcu_seq_diff,
+       .async          = call_rcu_tasks_rude,
+       .gp_barrier     = rcu_barrier_tasks_rude,
+       .sync           = synchronize_rcu_tasks_rude,
+       .exp_sync       = synchronize_rcu_tasks_rude,
+       .rso_gp_kthread = get_rcu_tasks_rude_gp_kthread,
+       .name           = "tasks-rude"
+};
+
+#define TASKS_RUDE_OPS &tasks_rude_ops,
+
+#else // #ifdef CONFIG_TASKS_RUDE_RCU
+
+#define TASKS_RUDE_OPS
+
+#endif // #else // #ifdef CONFIG_TASKS_RUDE_RCU
+
 #ifdef CONFIG_TASKS_TRACE_RCU
 
 /*
@@ -334,6 +376,7 @@ static struct rcu_scale_ops tasks_tracing_ops = {
        .gp_barrier     = rcu_barrier_tasks_trace,
        .sync           = synchronize_rcu_tasks_trace,
        .exp_sync       = synchronize_rcu_tasks_trace,
+       .rso_gp_kthread = get_rcu_tasks_trace_gp_kthread,
        .name           = "tasks-tracing"
 };
 
@@ -410,10 +453,12 @@ rcu_scale_writer(void *arg)
 {
        int i = 0;
        int i_max;
+       unsigned long jdone;
        long me = (long)arg;
        struct rcu_head *rhp = NULL;
        bool started = false, done = false, alldone = false;
        u64 t;
+       DEFINE_TORTURE_RANDOM(tr);
        u64 *wdp;
        u64 *wdpp = writer_durations[me];
 
@@ -424,7 +469,7 @@ rcu_scale_writer(void *arg)
        sched_set_fifo_low(current);
 
        if (holdoff)
-               schedule_timeout_uninterruptible(holdoff * HZ);
+               schedule_timeout_idle(holdoff * HZ);
 
        /*
         * Wait until rcu_end_inkernel_boot() is called for normal GP tests
@@ -445,9 +490,12 @@ rcu_scale_writer(void *arg)
                }
        }
 
+       jdone = jiffies + minruntime * HZ;
        do {
                if (writer_holdoff)
                        udelay(writer_holdoff);
+               if (writer_holdoff_jiffies)
+                       schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1);
                wdp = &wdpp[i];
                *wdp = ktime_get_mono_fast_ns();
                if (gp_async) {
@@ -475,7 +523,7 @@ retry:
                if (!started &&
                    atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
                        started = true;
-               if (!done && i >= MIN_MEAS) {
+               if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) {
                        done = true;
                        sched_set_normal(current, 0);
                        pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",
@@ -518,8 +566,8 @@ static void
 rcu_scale_print_module_parms(struct rcu_scale_ops *cur_ops, const char *tag)
 {
        pr_alert("%s" SCALE_FLAG
-                "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
-                scale_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
+                "--- %s: gp_async=%d gp_async_max=%d gp_exp=%d holdoff=%d minruntime=%d nreaders=%d nwriters=%d writer_holdoff=%d writer_holdoff_jiffies=%d verbose=%d shutdown=%d\n",
+                scale_type, tag, gp_async, gp_async_max, gp_exp, holdoff, minruntime, nrealreaders, nrealwriters, writer_holdoff, writer_holdoff_jiffies, verbose, shutdown);
 }
 
 /*
@@ -556,6 +604,8 @@ static struct task_struct **kfree_reader_tasks;
 static int kfree_nrealthreads;
 static atomic_t n_kfree_scale_thread_started;
 static atomic_t n_kfree_scale_thread_ended;
+static struct task_struct *kthread_tp;
+static u64 kthread_stime;
 
 struct kfree_obj {
        char kfree_obj[8];
@@ -701,6 +751,10 @@ kfree_scale_init(void)
        unsigned long jif_start;
        unsigned long orig_jif;
 
+       pr_alert("%s" SCALE_FLAG
+                "--- kfree_rcu_test: kfree_mult=%d kfree_by_call_rcu=%d kfree_nthreads=%d kfree_alloc_num=%d kfree_loops=%d kfree_rcu_test_double=%d kfree_rcu_test_single=%d\n",
+                scale_type, kfree_mult, kfree_by_call_rcu, kfree_nthreads, kfree_alloc_num, kfree_loops, kfree_rcu_test_double, kfree_rcu_test_single);
+
        // Also, do a quick self-test to ensure laziness is as much as
        // expected.
        if (kfree_by_call_rcu && !IS_ENABLED(CONFIG_RCU_LAZY)) {
@@ -797,6 +851,18 @@ rcu_scale_cleanup(void)
        if (gp_exp && gp_async)
                SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
 
+       // If built-in, just report all of the GP kthread's CPU time.
+       if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread)
+               kthread_tp = cur_ops->rso_gp_kthread();
+       if (kthread_tp) {
+               u32 ns;
+               u64 us;
+
+               kthread_stime = kthread_tp->stime - kthread_stime;
+               us = div_u64_rem(kthread_stime, 1000, &ns);
+               pr_info("rcu_scale: Grace-period kthread CPU time: %llu.%03u us\n", us, ns);
+               show_rcu_gp_kthreads();
+       }
        if (kfree_rcu_test) {
                kfree_scale_cleanup();
                return;
@@ -885,7 +951,7 @@ rcu_scale_init(void)
        long i;
        int firsterr = 0;
        static struct rcu_scale_ops *scale_ops[] = {
-               &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_TRACING_OPS
+               &rcu_ops, &srcu_ops, &srcud_ops, TASKS_OPS TASKS_RUDE_OPS TASKS_TRACING_OPS
        };
 
        if (!torture_init_begin(scale_type, verbose))
@@ -910,6 +976,11 @@ rcu_scale_init(void)
        if (cur_ops->init)
                cur_ops->init();
 
+       if (cur_ops->rso_gp_kthread) {
+               kthread_tp = cur_ops->rso_gp_kthread();
+               if (kthread_tp)
+                       kthread_stime = kthread_tp->stime;
+       }
        if (kfree_rcu_test)
                return kfree_scale_init();
 
index 147551c..ade42d6 100644 (file)
@@ -1581,6 +1581,7 @@ rcu_torture_writer(void *arg)
                                    rcu_access_pointer(rcu_torture_current) !=
                                    &rcu_tortures[i]) {
                                        tracing_off();
+                                       show_rcu_gp_kthreads();
                                        WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count);
                                        rcu_ftrace_dump(DUMP_ALL);
                                }
@@ -1876,7 +1877,7 @@ static int
 rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
 {
        int mask = rcutorture_extend_mask_max();
-       unsigned long randmask1 = torture_random(trsp) >> 8;
+       unsigned long randmask1 = torture_random(trsp);
        unsigned long randmask2 = randmask1 >> 3;
        unsigned long preempts = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED;
        unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ;
@@ -1935,7 +1936,7 @@ rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp,
        if (!((mask - 1) & mask))
                return rtrsp;  /* Current RCU reader not extendable. */
        /* Bias towards larger numbers of loops. */
-       i = (torture_random(trsp) >> 3);
+       i = torture_random(trsp);
        i = ((i | (i >> 3)) & RCUTORTURE_RDR_MAX_LOOPS) + 1;
        for (j = 0; j < i; j++) {
                mask = rcutorture_extend_mask(*readstate, trsp);
@@ -2136,7 +2137,7 @@ static int rcu_nocb_toggle(void *arg)
                toggle_fuzz = NSEC_PER_USEC;
        do {
                r = torture_random(&rand);
-               cpu = (r >> 4) % (maxcpu + 1);
+               cpu = (r >> 1) % (maxcpu + 1);
                if (r & 0x1) {
                        rcu_nocb_cpu_offload(cpu);
                        atomic_long_inc(&n_nocb_offload);
index 1970ce5..91a0fd0 100644 (file)
@@ -528,6 +528,38 @@ static struct ref_scale_ops clock_ops = {
        .name           = "clock"
 };
 
+static void ref_jiffies_section(const int nloops)
+{
+       u64 x = 0;
+       int i;
+
+       preempt_disable();
+       for (i = nloops; i >= 0; i--)
+               x += jiffies;
+       preempt_enable();
+       stopopts = x;
+}
+
+static void ref_jiffies_delay_section(const int nloops, const int udl, const int ndl)
+{
+       u64 x = 0;
+       int i;
+
+       preempt_disable();
+       for (i = nloops; i >= 0; i--) {
+               x += jiffies;
+               un_delay(udl, ndl);
+       }
+       preempt_enable();
+       stopopts = x;
+}
+
+static struct ref_scale_ops jiffies_ops = {
+       .readsection    = ref_jiffies_section,
+       .delaysection   = ref_jiffies_delay_section,
+       .name           = "jiffies"
+};
+
 ////////////////////////////////////////////////////////////////////////
 //
 // Methods leveraging SLAB_TYPESAFE_BY_RCU.
@@ -1047,7 +1079,7 @@ ref_scale_init(void)
        int firsterr = 0;
        static struct ref_scale_ops *scale_ops[] = {
                &rcu_ops, &srcu_ops, RCU_TRACE_OPS RCU_TASKS_OPS &refcnt_ops, &rwlock_ops,
-               &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops,
+               &rwsem_ops, &lock_ops, &lock_irq_ops, &acqrel_ops, &clock_ops, &jiffies_ops,
                &typesafe_ref_ops, &typesafe_lock_ops, &typesafe_seqlock_ops,
        };
 
@@ -1107,12 +1139,11 @@ ref_scale_init(void)
        VERBOSE_SCALEOUT("Starting %d reader threads", nreaders);
 
        for (i = 0; i < nreaders; i++) {
+               init_waitqueue_head(&reader_tasks[i].wq);
                firsterr = torture_create_kthread(ref_scale_reader, (void *)i,
                                                  reader_tasks[i].task);
                if (torture_init_error(firsterr))
                        goto unwind;
-
-               init_waitqueue_head(&(reader_tasks[i].wq));
        }
 
        // Main Task
index b770add..8d65f7d 100644 (file)
@@ -25,6 +25,8 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
  * @cblist: Callback list.
  * @lock: Lock protecting per-CPU callback list.
  * @rtp_jiffies: Jiffies counter value for statistics.
+ * @lazy_timer: Timer to unlazify callbacks.
+ * @urgent_gp: Number of additional non-lazy grace periods.
  * @rtp_n_lock_retries: Rough lock-contention statistic.
  * @rtp_work: Work queue for invoking callbacks.
  * @rtp_irq_work: IRQ work queue for deferred wakeups.
@@ -38,6 +40,8 @@ struct rcu_tasks_percpu {
        raw_spinlock_t __private lock;
        unsigned long rtp_jiffies;
        unsigned long rtp_n_lock_retries;
+       struct timer_list lazy_timer;
+       unsigned int urgent_gp;
        struct work_struct rtp_work;
        struct irq_work rtp_irq_work;
        struct rcu_head barrier_q_head;
@@ -51,7 +55,6 @@ struct rcu_tasks_percpu {
  * @cbs_wait: RCU wait allowing a new callback to get kthread's attention.
  * @cbs_gbl_lock: Lock protecting callback list.
  * @tasks_gp_mutex: Mutex protecting grace period, needed during mid-boot dead zone.
- * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
  * @gp_func: This flavor's grace-period-wait function.
  * @gp_state: Grace period's most recent state transition (debugging).
  * @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping.
@@ -61,6 +64,8 @@ struct rcu_tasks_percpu {
  * @tasks_gp_seq: Number of grace periods completed since boot.
  * @n_ipis: Number of IPIs sent to encourage grace periods to end.
  * @n_ipis_fails: Number of IPI-send failures.
+ * @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
+ * @lazy_jiffies: Number of jiffies to allow callbacks to be lazy.
  * @pregp_func: This flavor's pre-grace-period function (optional).
  * @pertask_func: This flavor's per-task scan function (optional).
  * @postscan_func: This flavor's post-task scan function (optional).
@@ -92,6 +97,7 @@ struct rcu_tasks {
        unsigned long n_ipis;
        unsigned long n_ipis_fails;
        struct task_struct *kthread_ptr;
+       unsigned long lazy_jiffies;
        rcu_tasks_gp_func_t gp_func;
        pregp_func_t pregp_func;
        pertask_func_t pertask_func;
@@ -127,6 +133,7 @@ static struct rcu_tasks rt_name =                                                   \
        .gp_func = gp,                                                                  \
        .call_func = call,                                                              \
        .rtpcpu = &rt_name ## __percpu,                                                 \
+       .lazy_jiffies = DIV_ROUND_UP(HZ, 4),                                            \
        .name = n,                                                                      \
        .percpu_enqueue_shift = order_base_2(CONFIG_NR_CPUS),                           \
        .percpu_enqueue_lim = 1,                                                        \
@@ -139,9 +146,7 @@ static struct rcu_tasks rt_name =                                                   \
 #ifdef CONFIG_TASKS_RCU
 /* Track exiting tasks in order to allow them to be waited for. */
 DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
-#endif
 
-#ifdef CONFIG_TASKS_RCU
 /* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */
 static void tasks_rcu_exit_srcu_stall(struct timer_list *unused);
 static DEFINE_TIMER(tasks_rcu_exit_srcu_stall_timer, tasks_rcu_exit_srcu_stall);
@@ -171,6 +176,8 @@ static int rcu_task_contend_lim __read_mostly = 100;
 module_param(rcu_task_contend_lim, int, 0444);
 static int rcu_task_collapse_lim __read_mostly = 10;
 module_param(rcu_task_collapse_lim, int, 0444);
+static int rcu_task_lazy_lim __read_mostly = 32;
+module_param(rcu_task_lazy_lim, int, 0444);
 
 /* RCU tasks grace-period state for debugging. */
 #define RTGS_INIT               0
@@ -229,7 +236,7 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp)
 #endif /* #ifndef CONFIG_TINY_RCU */
 
 // Initialize per-CPU callback lists for the specified flavor of
-// Tasks RCU.
+// Tasks RCU.  Do not enqueue callbacks before this function is invoked.
 static void cblist_init_generic(struct rcu_tasks *rtp)
 {
        int cpu;
@@ -237,7 +244,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
        int lim;
        int shift;
 
-       raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags);
        if (rcu_task_enqueue_lim < 0) {
                rcu_task_enqueue_lim = 1;
                rcu_task_cb_adjust = true;
@@ -260,22 +266,48 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
                WARN_ON_ONCE(!rtpcp);
                if (cpu)
                        raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock));
-               raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
+               local_irq_save(flags);  // serialize initialization
                if (rcu_segcblist_empty(&rtpcp->cblist))
                        rcu_segcblist_init(&rtpcp->cblist);
+               local_irq_restore(flags);
                INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq);
                rtpcp->cpu = cpu;
                rtpcp->rtpp = rtp;
                if (!rtpcp->rtp_blkd_tasks.next)
                        INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks);
-               raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
        }
-       raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
 
        pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name,
                        data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim), rcu_task_cb_adjust);
 }
 
+// Compute wakeup time for lazy callback timer.
+static unsigned long rcu_tasks_lazy_time(struct rcu_tasks *rtp)
+{
+       return jiffies + rtp->lazy_jiffies;
+}
+
+// Timer handler that unlazifies lazy callbacks.
+static void call_rcu_tasks_generic_timer(struct timer_list *tlp)
+{
+       unsigned long flags;
+       bool needwake = false;
+       struct rcu_tasks *rtp;
+       struct rcu_tasks_percpu *rtpcp = from_timer(rtpcp, tlp, lazy_timer);
+
+       rtp = rtpcp->rtpp;
+       raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
+       if (!rcu_segcblist_empty(&rtpcp->cblist) && rtp->lazy_jiffies) {
+               if (!rtpcp->urgent_gp)
+                       rtpcp->urgent_gp = 1;
+               needwake = true;
+               mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
+       }
+       raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
+       if (needwake)
+               rcuwait_wake_up(&rtp->cbs_wait);
+}
+
 // IRQ-work handler that does deferred wakeup for call_rcu_tasks_generic().
 static void call_rcu_tasks_iw_wakeup(struct irq_work *iwp)
 {
@@ -292,6 +324,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
 {
        int chosen_cpu;
        unsigned long flags;
+       bool havekthread = smp_load_acquire(&rtp->kthread_ptr);
        int ideal_cpu;
        unsigned long j;
        bool needadjust = false;
@@ -316,12 +349,19 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func,
                    READ_ONCE(rtp->percpu_enqueue_lim) != nr_cpu_ids)
                        needadjust = true;  // Defer adjustment to avoid deadlock.
        }
-       if (!rcu_segcblist_is_enabled(&rtpcp->cblist)) {
-               raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
-               cblist_init_generic(rtp);
-               raw_spin_lock_rcu_node(rtpcp); // irqs already disabled.
+       // Queuing callbacks before initialization not yet supported.
+       if (WARN_ON_ONCE(!rcu_segcblist_is_enabled(&rtpcp->cblist)))
+               rcu_segcblist_init(&rtpcp->cblist);
+       needwake = (func == wakeme_after_rcu) ||
+                  (rcu_segcblist_n_cbs(&rtpcp->cblist) == rcu_task_lazy_lim);
+       if (havekthread && !needwake && !timer_pending(&rtpcp->lazy_timer)) {
+               if (rtp->lazy_jiffies)
+                       mod_timer(&rtpcp->lazy_timer, rcu_tasks_lazy_time(rtp));
+               else
+                       needwake = rcu_segcblist_empty(&rtpcp->cblist);
        }
-       needwake = rcu_segcblist_empty(&rtpcp->cblist);
+       if (needwake)
+               rtpcp->urgent_gp = 3;
        rcu_segcblist_enqueue(&rtpcp->cblist, rhp);
        raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
        if (unlikely(needadjust)) {
@@ -415,9 +455,14 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp)
                }
                rcu_segcblist_advance(&rtpcp->cblist, rcu_seq_current(&rtp->tasks_gp_seq));
                (void)rcu_segcblist_accelerate(&rtpcp->cblist, rcu_seq_snap(&rtp->tasks_gp_seq));
-               if (rcu_segcblist_pend_cbs(&rtpcp->cblist))
+               if (rtpcp->urgent_gp > 0 && rcu_segcblist_pend_cbs(&rtpcp->cblist)) {
+                       if (rtp->lazy_jiffies)
+                               rtpcp->urgent_gp--;
                        needgpcb |= 0x3;
-               if (!rcu_segcblist_empty(&rtpcp->cblist))
+               } else if (rcu_segcblist_empty(&rtpcp->cblist)) {
+                       rtpcp->urgent_gp = 0;
+               }
+               if (rcu_segcblist_ready_cbs(&rtpcp->cblist))
                        needgpcb |= 0x1;
                raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
        }
@@ -525,10 +570,12 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
        if (unlikely(midboot)) {
                needgpcb = 0x2;
        } else {
+               mutex_unlock(&rtp->tasks_gp_mutex);
                set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
                rcuwait_wait_event(&rtp->cbs_wait,
                                   (needgpcb = rcu_tasks_need_gpcb(rtp)),
                                   TASK_IDLE);
+               mutex_lock(&rtp->tasks_gp_mutex);
        }
 
        if (needgpcb & 0x2) {
@@ -549,11 +596,19 @@ static void rcu_tasks_one_gp(struct rcu_tasks *rtp, bool midboot)
 // RCU-tasks kthread that detects grace periods and invokes callbacks.
 static int __noreturn rcu_tasks_kthread(void *arg)
 {
+       int cpu;
        struct rcu_tasks *rtp = arg;
 
+       for_each_possible_cpu(cpu) {
+               struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
+
+               timer_setup(&rtpcp->lazy_timer, call_rcu_tasks_generic_timer, 0);
+               rtpcp->urgent_gp = 1;
+       }
+
        /* Run on housekeeping CPUs by default.  Sysadm can move if desired. */
        housekeeping_affine(current, HK_TYPE_RCU);
-       WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start!
+       smp_store_release(&rtp->kthread_ptr, current); // Let GPs start!
 
        /*
         * Each pass through the following loop makes one check for
@@ -635,16 +690,22 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
 {
        int cpu;
        bool havecbs = false;
+       bool haveurgent = false;
+       bool haveurgentcbs = false;
 
        for_each_possible_cpu(cpu) {
                struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rtp->rtpcpu, cpu);
 
-               if (!data_race(rcu_segcblist_empty(&rtpcp->cblist))) {
+               if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)))
                        havecbs = true;
+               if (data_race(rtpcp->urgent_gp))
+                       haveurgent = true;
+               if (!data_race(rcu_segcblist_empty(&rtpcp->cblist)) && data_race(rtpcp->urgent_gp))
+                       haveurgentcbs = true;
+               if (havecbs && haveurgent && haveurgentcbs)
                        break;
-               }
        }
-       pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c %s\n",
+       pr_info("%s: %s(%d) since %lu g:%lu i:%lu/%lu %c%c%c%c l:%lu %s\n",
                rtp->kname,
                tasks_gp_state_getname(rtp), data_race(rtp->gp_state),
                jiffies - data_race(rtp->gp_jiffies),
@@ -652,6 +713,9 @@ static void show_rcu_tasks_generic_gp_kthread(struct rcu_tasks *rtp, char *s)
                data_race(rtp->n_ipis_fails), data_race(rtp->n_ipis),
                ".k"[!!data_race(rtp->kthread_ptr)],
                ".C"[havecbs],
+               ".u"[haveurgent],
+               ".U"[haveurgentcbs],
+               rtp->lazy_jiffies,
                s);
 }
 #endif // #ifndef CONFIG_TINY_RCU
@@ -1020,11 +1084,16 @@ void rcu_barrier_tasks(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
 
+int rcu_tasks_lazy_ms = -1;
+module_param(rcu_tasks_lazy_ms, int, 0444);
+
 static int __init rcu_spawn_tasks_kthread(void)
 {
        cblist_init_generic(&rcu_tasks);
        rcu_tasks.gp_sleep = HZ / 10;
        rcu_tasks.init_fract = HZ / 10;
+       if (rcu_tasks_lazy_ms >= 0)
+               rcu_tasks.lazy_jiffies = msecs_to_jiffies(rcu_tasks_lazy_ms);
        rcu_tasks.pregp_func = rcu_tasks_pregp_step;
        rcu_tasks.pertask_func = rcu_tasks_pertask;
        rcu_tasks.postscan_func = rcu_tasks_postscan;
@@ -1042,6 +1111,12 @@ void show_rcu_tasks_classic_gp_kthread(void)
 EXPORT_SYMBOL_GPL(show_rcu_tasks_classic_gp_kthread);
 #endif // !defined(CONFIG_TINY_RCU)
 
+struct task_struct *get_rcu_tasks_gp_kthread(void)
+{
+       return rcu_tasks.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread);
+
 /*
  * Contribute to protect against tasklist scan blind spot while the
  * task is exiting and may be removed from the tasklist. See
@@ -1173,10 +1248,15 @@ void rcu_barrier_tasks_rude(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
 
+int rcu_tasks_rude_lazy_ms = -1;
+module_param(rcu_tasks_rude_lazy_ms, int, 0444);
+
 static int __init rcu_spawn_tasks_rude_kthread(void)
 {
        cblist_init_generic(&rcu_tasks_rude);
        rcu_tasks_rude.gp_sleep = HZ / 10;
+       if (rcu_tasks_rude_lazy_ms >= 0)
+               rcu_tasks_rude.lazy_jiffies = msecs_to_jiffies(rcu_tasks_rude_lazy_ms);
        rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
        return 0;
 }
@@ -1188,6 +1268,13 @@ void show_rcu_tasks_rude_gp_kthread(void)
 }
 EXPORT_SYMBOL_GPL(show_rcu_tasks_rude_gp_kthread);
 #endif // !defined(CONFIG_TINY_RCU)
+
+struct task_struct *get_rcu_tasks_rude_gp_kthread(void)
+{
+       return rcu_tasks_rude.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_rude_gp_kthread);
+
 #endif /* #ifdef CONFIG_TASKS_RUDE_RCU */
 
 ////////////////////////////////////////////////////////////////////////
@@ -1793,6 +1880,9 @@ void rcu_barrier_tasks_trace(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
 
+int rcu_tasks_trace_lazy_ms = -1;
+module_param(rcu_tasks_trace_lazy_ms, int, 0444);
+
 static int __init rcu_spawn_tasks_trace_kthread(void)
 {
        cblist_init_generic(&rcu_tasks_trace);
@@ -1807,6 +1897,8 @@ static int __init rcu_spawn_tasks_trace_kthread(void)
                if (rcu_tasks_trace.init_fract <= 0)
                        rcu_tasks_trace.init_fract = 1;
        }
+       if (rcu_tasks_trace_lazy_ms >= 0)
+               rcu_tasks_trace.lazy_jiffies = msecs_to_jiffies(rcu_tasks_trace_lazy_ms);
        rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
        rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
        rcu_tasks_trace.holdouts_func = check_all_holdout_tasks_trace;
@@ -1830,6 +1922,12 @@ void show_rcu_tasks_trace_gp_kthread(void)
 EXPORT_SYMBOL_GPL(show_rcu_tasks_trace_gp_kthread);
 #endif // !defined(CONFIG_TINY_RCU)
 
+struct task_struct *get_rcu_tasks_trace_gp_kthread(void)
+{
+       return rcu_tasks_trace.kthread_ptr;
+}
+EXPORT_SYMBOL_GPL(get_rcu_tasks_trace_gp_kthread);
+
 #else /* #ifdef CONFIG_TASKS_TRACE_RCU */
 static void exit_tasks_rcu_finish_trace(struct task_struct *t) { }
 #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
index 1449cb6..cb1caef 100644 (file)
@@ -632,7 +632,7 @@ void __rcu_irq_enter_check_tick(void)
        // prevents self-deadlock.  So we can safely recheck under the lock.
        // Note that the nohz_full state currently cannot change.
        raw_spin_lock_rcu_node(rdp->mynode);
-       if (rdp->rcu_urgent_qs && !rdp->rcu_forced_tick) {
+       if (READ_ONCE(rdp->rcu_urgent_qs) && !rdp->rcu_forced_tick) {
                // A nohz_full CPU is in the kernel and RCU needs a
                // quiescent state.  Turn on the tick!
                WRITE_ONCE(rdp->rcu_forced_tick, true);
@@ -677,12 +677,16 @@ static void rcu_disable_urgency_upon_qs(struct rcu_data *rdp)
 }
 
 /**
- * rcu_is_watching - see if RCU thinks that the current CPU is not idle
+ * rcu_is_watching - RCU read-side critical sections permitted on current CPU?
  *
- * Return true if RCU is watching the running CPU, which means that this
- * CPU can safely enter RCU read-side critical sections.  In other words,
- * if the current CPU is not in its idle loop or is in an interrupt or
- * NMI handler, return true.
+ * Return @true if RCU is watching the running CPU and @false otherwise.
+ * An @true return means that this CPU can safely enter RCU read-side
+ * critical sections.
+ *
+ * Although calls to rcu_is_watching() from most parts of the kernel
+ * will return @true, there are important exceptions.  For example, if the
+ * current CPU is deep within its idle loop, in kernel entry/exit code,
+ * or offline, rcu_is_watching() will return @false.
  *
  * Make notrace because it can be called by the internal functions of
  * ftrace, and making this notrace removes unnecessary recursion calls.
index 43229d2..5598212 100644 (file)
@@ -77,9 +77,9 @@ __setup("rcu_nocbs", rcu_nocb_setup);
 static int __init parse_rcu_nocb_poll(char *arg)
 {
        rcu_nocb_poll = true;
-       return 0;
+       return 1;
 }
-early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
+__setup("rcu_nocb_poll", parse_rcu_nocb_poll);
 
 /*
  * Don't bother bypassing ->cblist if the call_rcu() rate is low.
index 5d113aa..59032aa 100644 (file)
@@ -171,7 +171,8 @@ static void scf_torture_stats_print(void)
                scfs.n_all_wait += scf_stats_p[i].n_all_wait;
        }
        if (atomic_read(&n_errs) || atomic_read(&n_mb_in_errs) ||
-           atomic_read(&n_mb_out_errs) || atomic_read(&n_alloc_errs))
+           atomic_read(&n_mb_out_errs) ||
+           (!IS_ENABLED(CONFIG_KASAN) && atomic_read(&n_alloc_errs)))
                bangstr = "!!! ";
        pr_alert("%s %sscf_invoked_count %s: %lld resched: %lld single: %lld/%lld single_ofl: %lld/%lld single_rpc: %lld single_rpc_ofl: %lld many: %lld/%lld all: %lld/%lld ",
                 SCFTORT_FLAG, bangstr, isdone ? "VER" : "ver", invoked_count, scfs.n_resched,
@@ -312,6 +313,7 @@ static void scf_handler_1(void *scfc_in)
 // Randomly do an smp_call_function*() invocation.
 static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_random_state *trsp)
 {
+       bool allocfail = false;
        uintptr_t cpu;
        int ret = 0;
        struct scf_check *scfcp = NULL;
@@ -323,8 +325,10 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
                preempt_disable();
        if (scfsp->scfs_prim == SCF_PRIM_SINGLE || scfsp->scfs_wait) {
                scfcp = kmalloc(sizeof(*scfcp), GFP_ATOMIC);
-               if (WARN_ON_ONCE(!scfcp)) {
+               if (!scfcp) {
+                       WARN_ON_ONCE(!IS_ENABLED(CONFIG_KASAN));
                        atomic_inc(&n_alloc_errs);
+                       allocfail = true;
                } else {
                        scfcp->scfc_cpu = -1;
                        scfcp->scfc_wait = scfsp->scfs_wait;
@@ -431,7 +435,9 @@ static void scftorture_invoke_one(struct scf_statistics *scfp, struct torture_ra
                cpus_read_unlock();
        else
                preempt_enable();
-       if (!(torture_random(trsp) & 0xfff))
+       if (allocfail)
+               schedule_timeout_idle((1 + longwait) * HZ);  // Let no-wait handlers complete.
+       else if (!(torture_random(trsp) & 0xfff))
                schedule_timeout_uninterruptible(1);
 }
 
index d57a5c1..3561ab5 100644 (file)
  * Waiting for completion is a typically sync point, but not an exclusion point.
  */
 
+static void complete_with_flags(struct completion *x, int wake_flags)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&x->wait.lock, flags);
+
+       if (x->done != UINT_MAX)
+               x->done++;
+       swake_up_locked(&x->wait, wake_flags);
+       raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+
+void complete_on_current_cpu(struct completion *x)
+{
+       return complete_with_flags(x, WF_CURRENT_CPU);
+}
+
 /**
  * complete: - signals a single thread waiting on this completion
  * @x:  holds the state of this particular completion
  */
 void complete(struct completion *x)
 {
-       unsigned long flags;
-
-       raw_spin_lock_irqsave(&x->wait.lock, flags);
-
-       if (x->done != UINT_MAX)
-               x->done++;
-       swake_up_locked(&x->wait);
-       raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+       complete_with_flags(x, 0);
 }
 EXPORT_SYMBOL(complete);
 
index c52c2eb..2299a5c 100644 (file)
@@ -1097,25 +1097,22 @@ int get_nohz_timer_target(void)
 
        hk_mask = housekeeping_cpumask(HK_TYPE_TIMER);
 
-       rcu_read_lock();
+       guard(rcu)();
+
        for_each_domain(cpu, sd) {
                for_each_cpu_and(i, sched_domain_span(sd), hk_mask) {
                        if (cpu == i)
                                continue;
 
-                       if (!idle_cpu(i)) {
-                               cpu = i;
-                               goto unlock;
-                       }
+                       if (!idle_cpu(i))
+                               return i;
                }
        }
 
        if (default_cpu == -1)
                default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER);
-       cpu = default_cpu;
-unlock:
-       rcu_read_unlock();
-       return cpu;
+
+       return default_cpu;
 }
 
 /*
@@ -1194,6 +1191,20 @@ static void nohz_csd_func(void *info)
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
+static inline bool __need_bw_check(struct rq *rq, struct task_struct *p)
+{
+       if (rq->nr_running != 1)
+               return false;
+
+       if (p->sched_class != &fair_sched_class)
+               return false;
+
+       if (!task_on_rq_queued(p))
+               return false;
+
+       return true;
+}
+
 bool sched_can_stop_tick(struct rq *rq)
 {
        int fifo_nr_running;
@@ -1229,6 +1240,18 @@ bool sched_can_stop_tick(struct rq *rq)
        if (rq->nr_running > 1)
                return false;
 
+       /*
+        * If there is one task and it has CFS runtime bandwidth constraints
+        * and it's on the cpu now we don't want to stop the tick.
+        * This check prevents clearing the bit if a newly enqueued task here is
+        * dequeued by migrating while the constrained task continues to run.
+        * E.g. going from 2->1 without going through pick_next_task().
+        */
+       if (sched_feat(HZ_BW) && __need_bw_check(rq, rq->curr)) {
+               if (cfs_task_bw_constrained(rq->curr))
+                       return false;
+       }
+
        return true;
 }
 #endif /* CONFIG_NO_HZ_FULL */
@@ -1804,7 +1827,8 @@ static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
        int old_min, old_max, old_min_rt;
        int result;
 
-       mutex_lock(&uclamp_mutex);
+       guard(mutex)(&uclamp_mutex);
+
        old_min = sysctl_sched_uclamp_util_min;
        old_max = sysctl_sched_uclamp_util_max;
        old_min_rt = sysctl_sched_uclamp_util_min_rt_default;
@@ -1813,7 +1837,7 @@ static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
        if (result)
                goto undo;
        if (!write)
-               goto done;
+               return 0;
 
        if (sysctl_sched_uclamp_util_min > sysctl_sched_uclamp_util_max ||
            sysctl_sched_uclamp_util_max > SCHED_CAPACITY_SCALE ||
@@ -1849,16 +1873,12 @@ static int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
         * Otherwise, keep it simple and do just a lazy update at each next
         * task enqueue time.
         */
-
-       goto done;
+       return 0;
 
 undo:
        sysctl_sched_uclamp_util_min = old_min;
        sysctl_sched_uclamp_util_max = old_max;
        sysctl_sched_uclamp_util_min_rt_default = old_min_rt;
-done:
-       mutex_unlock(&uclamp_mutex);
-
        return result;
 }
 #endif
@@ -3413,7 +3433,6 @@ static int migrate_swap_stop(void *data)
 {
        struct migration_swap_arg *arg = data;
        struct rq *src_rq, *dst_rq;
-       int ret = -EAGAIN;
 
        if (!cpu_active(arg->src_cpu) || !cpu_active(arg->dst_cpu))
                return -EAGAIN;
@@ -3421,33 +3440,25 @@ static int migrate_swap_stop(void *data)
        src_rq = cpu_rq(arg->src_cpu);
        dst_rq = cpu_rq(arg->dst_cpu);
 
-       double_raw_lock(&arg->src_task->pi_lock,
-                       &arg->dst_task->pi_lock);
-       double_rq_lock(src_rq, dst_rq);
+       guard(double_raw_spinlock)(&arg->src_task->pi_lock, &arg->dst_task->pi_lock);
+       guard(double_rq_lock)(src_rq, dst_rq);
 
        if (task_cpu(arg->dst_task) != arg->dst_cpu)
-               goto unlock;
+               return -EAGAIN;
 
        if (task_cpu(arg->src_task) != arg->src_cpu)
-               goto unlock;
+               return -EAGAIN;
 
        if (!cpumask_test_cpu(arg->dst_cpu, arg->src_task->cpus_ptr))
-               goto unlock;
+               return -EAGAIN;
 
        if (!cpumask_test_cpu(arg->src_cpu, arg->dst_task->cpus_ptr))
-               goto unlock;
+               return -EAGAIN;
 
        __migrate_swap_task(arg->src_task, arg->dst_cpu);
        __migrate_swap_task(arg->dst_task, arg->src_cpu);
 
-       ret = 0;
-
-unlock:
-       double_rq_unlock(src_rq, dst_rq);
-       raw_spin_unlock(&arg->dst_task->pi_lock);
-       raw_spin_unlock(&arg->src_task->pi_lock);
-
-       return ret;
+       return 0;
 }
 
 /*
@@ -3722,14 +3733,14 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
                struct sched_domain *sd;
 
                __schedstat_inc(p->stats.nr_wakeups_remote);
-               rcu_read_lock();
+
+               guard(rcu)();
                for_each_domain(rq->cpu, sd) {
                        if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
                                __schedstat_inc(sd->ttwu_wake_remote);
                                break;
                        }
                }
-               rcu_read_unlock();
        }
 
        if (wake_flags & WF_MIGRATED)
@@ -3928,21 +3939,13 @@ static void __ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags
 void wake_up_if_idle(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
-       struct rq_flags rf;
-
-       rcu_read_lock();
 
-       if (!is_idle_task(rcu_dereference(rq->curr)))
-               goto out;
-
-       rq_lock_irqsave(rq, &rf);
-       if (is_idle_task(rq->curr))
-               resched_curr(rq);
-       /* Else CPU is not idle, do nothing here: */
-       rq_unlock_irqrestore(rq, &rf);
-
-out:
-       rcu_read_unlock();
+       guard(rcu)();
+       if (is_idle_task(rcu_dereference(rq->curr))) {
+               guard(rq_lock_irqsave)(rq);
+               if (is_idle_task(rq->curr))
+                       resched_curr(rq);
+       }
 }
 
 bool cpus_share_cache(int this_cpu, int that_cpu)
@@ -4193,13 +4196,11 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
  * Return: %true if @p->state changes (an actual wakeup was done),
  *        %false otherwise.
  */
-static int
-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 {
-       unsigned long flags;
+       guard(preempt)();
        int cpu, success = 0;
 
-       preempt_disable();
        if (p == current) {
                /*
                 * We're waking current, this means 'p->on_rq' and 'task_cpu(p)
@@ -4226,129 +4227,127 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         * reordered with p->state check below. This pairs with smp_store_mb()
         * in set_current_state() that the waiting thread does.
         */
-       raw_spin_lock_irqsave(&p->pi_lock, flags);
-       smp_mb__after_spinlock();
-       if (!ttwu_state_match(p, state, &success))
-               goto unlock;
+       scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
+               smp_mb__after_spinlock();
+               if (!ttwu_state_match(p, state, &success))
+                       break;
 
-       trace_sched_waking(p);
+               trace_sched_waking(p);
 
-       /*
-        * Ensure we load p->on_rq _after_ p->state, otherwise it would
-        * be possible to, falsely, observe p->on_rq == 0 and get stuck
-        * in smp_cond_load_acquire() below.
-        *
-        * sched_ttwu_pending()                 try_to_wake_up()
-        *   STORE p->on_rq = 1                   LOAD p->state
-        *   UNLOCK rq->lock
-        *
-        * __schedule() (switch to task 'p')
-        *   LOCK rq->lock                        smp_rmb();
-        *   smp_mb__after_spinlock();
-        *   UNLOCK rq->lock
-        *
-        * [task p]
-        *   STORE p->state = UNINTERRUPTIBLE     LOAD p->on_rq
-        *
-        * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-        * __schedule().  See the comment for smp_mb__after_spinlock().
-        *
-        * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
-        */
-       smp_rmb();
-       if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
-               goto unlock;
+               /*
+                * Ensure we load p->on_rq _after_ p->state, otherwise it would
+                * be possible to, falsely, observe p->on_rq == 0 and get stuck
+                * in smp_cond_load_acquire() below.
+                *
+                * sched_ttwu_pending()                 try_to_wake_up()
+                *   STORE p->on_rq = 1                   LOAD p->state
+                *   UNLOCK rq->lock
+                *
+                * __schedule() (switch to task 'p')
+                *   LOCK rq->lock                        smp_rmb();
+                *   smp_mb__after_spinlock();
+                *   UNLOCK rq->lock
+                *
+                * [task p]
+                *   STORE p->state = UNINTERRUPTIBLE     LOAD p->on_rq
+                *
+                * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+                * __schedule().  See the comment for smp_mb__after_spinlock().
+                *
+                * A similar smb_rmb() lives in try_invoke_on_locked_down_task().
+                */
+               smp_rmb();
+               if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
+                       break;
 
 #ifdef CONFIG_SMP
-       /*
-        * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
-        * possible to, falsely, observe p->on_cpu == 0.
-        *
-        * One must be running (->on_cpu == 1) in order to remove oneself
-        * from the runqueue.
-        *
-        * __schedule() (switch to task 'p')    try_to_wake_up()
-        *   STORE p->on_cpu = 1                  LOAD p->on_rq
-        *   UNLOCK rq->lock
-        *
-        * __schedule() (put 'p' to sleep)
-        *   LOCK rq->lock                        smp_rmb();
-        *   smp_mb__after_spinlock();
-        *   STORE p->on_rq = 0                   LOAD p->on_cpu
-        *
-        * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
-        * __schedule().  See the comment for smp_mb__after_spinlock().
-        *
-        * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
-        * schedule()'s deactivate_task() has 'happened' and p will no longer
-        * care about it's own p->state. See the comment in __schedule().
-        */
-       smp_acquire__after_ctrl_dep();
+               /*
+                * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
+                * possible to, falsely, observe p->on_cpu == 0.
+                *
+                * One must be running (->on_cpu == 1) in order to remove oneself
+                * from the runqueue.
+                *
+                * __schedule() (switch to task 'p')    try_to_wake_up()
+                *   STORE p->on_cpu = 1                  LOAD p->on_rq
+                *   UNLOCK rq->lock
+                *
+                * __schedule() (put 'p' to sleep)
+                *   LOCK rq->lock                        smp_rmb();
+                *   smp_mb__after_spinlock();
+                *   STORE p->on_rq = 0                   LOAD p->on_cpu
+                *
+                * Pairs with the LOCK+smp_mb__after_spinlock() on rq->lock in
+                * __schedule().  See the comment for smp_mb__after_spinlock().
+                *
+                * Form a control-dep-acquire with p->on_rq == 0 above, to ensure
+                * schedule()'s deactivate_task() has 'happened' and p will no longer
+                * care about it's own p->state. See the comment in __schedule().
+                */
+               smp_acquire__after_ctrl_dep();
 
-       /*
-        * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
-        * == 0), which means we need to do an enqueue, change p->state to
-        * TASK_WAKING such that we can unlock p->pi_lock before doing the
-        * enqueue, such as ttwu_queue_wakelist().
-        */
-       WRITE_ONCE(p->__state, TASK_WAKING);
+               /*
+                * We're doing the wakeup (@success == 1), they did a dequeue (p->on_rq
+                * == 0), which means we need to do an enqueue, change p->state to
+                * TASK_WAKING such that we can unlock p->pi_lock before doing the
+                * enqueue, such as ttwu_queue_wakelist().
+                */
+               WRITE_ONCE(p->__state, TASK_WAKING);
 
-       /*
-        * If the owning (remote) CPU is still in the middle of schedule() with
-        * this task as prev, considering queueing p on the remote CPUs wake_list
-        * which potentially sends an IPI instead of spinning on p->on_cpu to
-        * let the waker make forward progress. This is safe because IRQs are
-        * disabled and the IPI will deliver after on_cpu is cleared.
-        *
-        * Ensure we load task_cpu(p) after p->on_cpu:
-        *
-        * set_task_cpu(p, cpu);
-        *   STORE p->cpu = @cpu
-        * __schedule() (switch to task 'p')
-        *   LOCK rq->lock
-        *   smp_mb__after_spin_lock()          smp_cond_load_acquire(&p->on_cpu)
-        *   STORE p->on_cpu = 1                LOAD p->cpu
-        *
-        * to ensure we observe the correct CPU on which the task is currently
-        * scheduling.
-        */
-       if (smp_load_acquire(&p->on_cpu) &&
-           ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
-               goto unlock;
+               /*
+                * If the owning (remote) CPU is still in the middle of schedule() with
+                * this task as prev, considering queueing p on the remote CPUs wake_list
+                * which potentially sends an IPI instead of spinning on p->on_cpu to
+                * let the waker make forward progress. This is safe because IRQs are
+                * disabled and the IPI will deliver after on_cpu is cleared.
+                *
+                * Ensure we load task_cpu(p) after p->on_cpu:
+                *
+                * set_task_cpu(p, cpu);
+                *   STORE p->cpu = @cpu
+                * __schedule() (switch to task 'p')
+                *   LOCK rq->lock
+                *   smp_mb__after_spin_lock()          smp_cond_load_acquire(&p->on_cpu)
+                *   STORE p->on_cpu = 1                LOAD p->cpu
+                *
+                * to ensure we observe the correct CPU on which the task is currently
+                * scheduling.
+                */
+               if (smp_load_acquire(&p->on_cpu) &&
+                   ttwu_queue_wakelist(p, task_cpu(p), wake_flags))
+                       break;
 
-       /*
-        * If the owning (remote) CPU is still in the middle of schedule() with
-        * this task as prev, wait until it's done referencing the task.
-        *
-        * Pairs with the smp_store_release() in finish_task().
-        *
-        * This ensures that tasks getting woken will be fully ordered against
-        * their previous state and preserve Program Order.
-        */
-       smp_cond_load_acquire(&p->on_cpu, !VAL);
+               /*
+                * If the owning (remote) CPU is still in the middle of schedule() with
+                * this task as prev, wait until it's done referencing the task.
+                *
+                * Pairs with the smp_store_release() in finish_task().
+                *
+                * This ensures that tasks getting woken will be fully ordered against
+                * their previous state and preserve Program Order.
+                */
+               smp_cond_load_acquire(&p->on_cpu, !VAL);
 
-       cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
-       if (task_cpu(p) != cpu) {
-               if (p->in_iowait) {
-                       delayacct_blkio_end(p);
-                       atomic_dec(&task_rq(p)->nr_iowait);
-               }
+               cpu = select_task_rq(p, p->wake_cpu, wake_flags | WF_TTWU);
+               if (task_cpu(p) != cpu) {
+                       if (p->in_iowait) {
+                               delayacct_blkio_end(p);
+                               atomic_dec(&task_rq(p)->nr_iowait);
+                       }
 
-               wake_flags |= WF_MIGRATED;
-               psi_ttwu_dequeue(p);
-               set_task_cpu(p, cpu);
-       }
+                       wake_flags |= WF_MIGRATED;
+                       psi_ttwu_dequeue(p);
+                       set_task_cpu(p, cpu);
+               }
 #else
-       cpu = task_cpu(p);
+               cpu = task_cpu(p);
 #endif /* CONFIG_SMP */
 
-       ttwu_queue(p, cpu, wake_flags);
-unlock:
-       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               ttwu_queue(p, cpu, wake_flags);
+       }
 out:
        if (success)
                ttwu_stat(p, task_cpu(p), wake_flags);
-       preempt_enable();
 
        return success;
 }
@@ -4501,6 +4500,8 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
        p->se.prev_sum_exec_runtime     = 0;
        p->se.nr_migrations             = 0;
        p->se.vruntime                  = 0;
+       p->se.vlag                      = 0;
+       p->se.slice                     = sysctl_sched_base_slice;
        INIT_LIST_HEAD(&p->se.group_node);
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -5496,23 +5497,20 @@ unsigned int nr_iowait(void)
 void sched_exec(void)
 {
        struct task_struct *p = current;
-       unsigned long flags;
+       struct migration_arg arg;
        int dest_cpu;
 
-       raw_spin_lock_irqsave(&p->pi_lock, flags);
-       dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), WF_EXEC);
-       if (dest_cpu == smp_processor_id())
-               goto unlock;
+       scoped_guard (raw_spinlock_irqsave, &p->pi_lock) {
+               dest_cpu = p->sched_class->select_task_rq(p, task_cpu(p), WF_EXEC);
+               if (dest_cpu == smp_processor_id())
+                       return;
 
-       if (likely(cpu_active(dest_cpu))) {
-               struct migration_arg arg = { p, dest_cpu };
+               if (unlikely(!cpu_active(dest_cpu)))
+                       return;
 
-               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
-               stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
-               return;
+               arg = (struct migration_arg){ p, dest_cpu };
        }
-unlock:
-       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+       stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
 }
 
 #endif
@@ -5722,9 +5720,6 @@ static void sched_tick_remote(struct work_struct *work)
        struct tick_work *twork = container_of(dwork, struct tick_work, work);
        int cpu = twork->cpu;
        struct rq *rq = cpu_rq(cpu);
-       struct task_struct *curr;
-       struct rq_flags rf;
-       u64 delta;
        int os;
 
        /*
@@ -5734,30 +5729,26 @@ static void sched_tick_remote(struct work_struct *work)
         * statistics and checks timeslices in a time-independent way, regardless
         * of when exactly it is running.
         */
-       if (!tick_nohz_tick_stopped_cpu(cpu))
-               goto out_requeue;
+       if (tick_nohz_tick_stopped_cpu(cpu)) {
+               guard(rq_lock_irq)(rq);
+               struct task_struct *curr = rq->curr;
 
-       rq_lock_irq(rq, &rf);
-       curr = rq->curr;
-       if (cpu_is_offline(cpu))
-               goto out_unlock;
+               if (cpu_online(cpu)) {
+                       update_rq_clock(rq);
 
-       update_rq_clock(rq);
+                       if (!is_idle_task(curr)) {
+                               /*
+                                * Make sure the next tick runs within a
+                                * reasonable amount of time.
+                                */
+                               u64 delta = rq_clock_task(rq) - curr->se.exec_start;
+                               WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+                       }
+                       curr->sched_class->task_tick(rq, curr, 0);
 
-       if (!is_idle_task(curr)) {
-               /*
-                * Make sure the next tick runs within a reasonable
-                * amount of time.
-                */
-               delta = rq_clock_task(rq) - curr->se.exec_start;
-               WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
+                       calc_load_nohz_remote(rq);
+               }
        }
-       curr->sched_class->task_tick(rq, curr, 0);
-
-       calc_load_nohz_remote(rq);
-out_unlock:
-       rq_unlock_irq(rq, &rf);
-out_requeue:
 
        /*
         * Run the remote tick once per second (1Hz). This arbitrary
@@ -6306,19 +6297,19 @@ static bool try_steal_cookie(int this, int that)
        unsigned long cookie;
        bool success = false;
 
-       local_irq_disable();
-       double_rq_lock(dst, src);
+       guard(irq)();
+       guard(double_rq_lock)(dst, src);
 
        cookie = dst->core->core_cookie;
        if (!cookie)
-               goto unlock;
+               return false;
 
        if (dst->curr != dst->idle)
-               goto unlock;
+               return false;
 
        p = sched_core_find(src, cookie);
        if (!p)
-               goto unlock;
+               return false;
 
        do {
                if (p == src->core_pick || p == src->curr)
@@ -6330,9 +6321,10 @@ static bool try_steal_cookie(int this, int that)
                if (p->core_occupation > dst->idle->core_occupation)
                        goto next;
                /*
-                * sched_core_find() and sched_core_next() will ensure that task @p
-                * is not throttled now, we also need to check whether the runqueue
-                * of the destination CPU is being throttled.
+                * sched_core_find() and sched_core_next() will ensure
+                * that task @p is not throttled now, we also need to
+                * check whether the runqueue of the destination CPU is
+                * being throttled.
                 */
                if (sched_task_is_throttled(p, this))
                        goto next;
@@ -6350,10 +6342,6 @@ next:
                p = sched_core_next(p, cookie);
        } while (p);
 
-unlock:
-       double_rq_unlock(dst, src);
-       local_irq_enable();
-
        return success;
 }
 
@@ -6411,20 +6399,24 @@ static void queue_core_balance(struct rq *rq)
        queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
 }
 
+DEFINE_LOCK_GUARD_1(core_lock, int,
+                   sched_core_lock(*_T->lock, &_T->flags),
+                   sched_core_unlock(*_T->lock, &_T->flags),
+                   unsigned long flags)
+
 static void sched_core_cpu_starting(unsigned int cpu)
 {
        const struct cpumask *smt_mask = cpu_smt_mask(cpu);
        struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
-       unsigned long flags;
        int t;
 
-       sched_core_lock(cpu, &flags);
+       guard(core_lock)(&cpu);
 
        WARN_ON_ONCE(rq->core != rq);
 
        /* if we're the first, we'll be our own leader */
        if (cpumask_weight(smt_mask) == 1)
-               goto unlock;
+               return;
 
        /* find the leader */
        for_each_cpu(t, smt_mask) {
@@ -6438,7 +6430,7 @@ static void sched_core_cpu_starting(unsigned int cpu)
        }
 
        if (WARN_ON_ONCE(!core_rq)) /* whoopsie */
-               goto unlock;
+               return;
 
        /* install and validate core_rq */
        for_each_cpu(t, smt_mask) {
@@ -6449,29 +6441,25 @@ static void sched_core_cpu_starting(unsigned int cpu)
 
                WARN_ON_ONCE(rq->core != core_rq);
        }
-
-unlock:
-       sched_core_unlock(cpu, &flags);
 }
 
 static void sched_core_cpu_deactivate(unsigned int cpu)
 {
        const struct cpumask *smt_mask = cpu_smt_mask(cpu);
        struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
-       unsigned long flags;
        int t;
 
-       sched_core_lock(cpu, &flags);
+       guard(core_lock)(&cpu);
 
        /* if we're the last man standing, nothing to do */
        if (cpumask_weight(smt_mask) == 1) {
                WARN_ON_ONCE(rq->core != rq);
-               goto unlock;
+               return;
        }
 
        /* if we're not the leader, nothing to do */
        if (rq->core != rq)
-               goto unlock;
+               return;
 
        /* find a new leader */
        for_each_cpu(t, smt_mask) {
@@ -6482,7 +6470,7 @@ static void sched_core_cpu_deactivate(unsigned int cpu)
        }
 
        if (WARN_ON_ONCE(!core_rq)) /* impossible */
-               goto unlock;
+               return;
 
        /* copy the shared state to the new leader */
        core_rq->core_task_seq             = rq->core_task_seq;
@@ -6504,9 +6492,6 @@ static void sched_core_cpu_deactivate(unsigned int cpu)
                rq = cpu_rq(t);
                rq->core = core_rq;
        }
-
-unlock:
-       sched_core_unlock(cpu, &flags);
 }
 
 static inline void sched_core_cpu_dying(unsigned int cpu)
@@ -7030,7 +7015,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
 int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
                          void *key)
 {
-       WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
+       WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_CURRENT_CPU));
        return try_to_wake_up(curr->private, mode, wake_flags);
 }
 EXPORT_SYMBOL(default_wake_function);
@@ -7383,6 +7368,19 @@ struct task_struct *idle_task(int cpu)
        return cpu_rq(cpu)->idle;
 }
 
+#ifdef CONFIG_SCHED_CORE
+int sched_core_idle_cpu(int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+
+       if (sched_core_enabled(rq) && rq->curr == rq->idle)
+               return 1;
+
+       return idle_cpu(cpu);
+}
+
+#endif
+
 #ifdef CONFIG_SMP
 /*
  * This function computes an effective utilization for the given CPU, to be
@@ -9940,7 +9938,7 @@ void __init sched_init(void)
                ptr += nr_cpu_ids * sizeof(void **);
 
                root_task_group.shares = ROOT_TASK_GROUP_LOAD;
-               init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
+               init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL);
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 #ifdef CONFIG_RT_GROUP_SCHED
                root_task_group.rt_se = (struct sched_rt_entity **)ptr;
@@ -11074,11 +11072,16 @@ static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
 
                /*
                 * Ensure max(child_quota) <= parent_quota.  On cgroup2,
-                * always take the min.  On cgroup1, only inherit when no
-                * limit is set:
+                * always take the non-RUNTIME_INF min.  On cgroup1, only
+                * inherit when no limit is set. In both cases this is used
+                * by the scheduler to determine if a given CFS task has a
+                * bandwidth constraint at some higher level.
                 */
                if (cgroup_subsys_on_dfl(cpu_cgrp_subsys)) {
-                       quota = min(quota, parent_quota);
+                       if (quota == RUNTIME_INF)
+                               quota = parent_quota;
+                       else if (parent_quota != RUNTIME_INF)
+                               quota = min(quota, parent_quota);
                } else {
                        if (quota == RUNTIME_INF)
                                quota = parent_quota;
@@ -11139,6 +11142,27 @@ static int cpu_cfs_stat_show(struct seq_file *sf, void *v)
 
        return 0;
 }
+
+static u64 throttled_time_self(struct task_group *tg)
+{
+       int i;
+       u64 total = 0;
+
+       for_each_possible_cpu(i) {
+               total += READ_ONCE(tg->cfs_rq[i]->throttled_clock_self_time);
+       }
+
+       return total;
+}
+
+static int cpu_cfs_local_stat_show(struct seq_file *sf, void *v)
+{
+       struct task_group *tg = css_tg(seq_css(sf));
+
+       seq_printf(sf, "throttled_time %llu\n", throttled_time_self(tg));
+
+       return 0;
+}
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
 
@@ -11215,6 +11239,10 @@ static struct cftype cpu_legacy_files[] = {
                .name = "stat",
                .seq_show = cpu_cfs_stat_show,
        },
+       {
+               .name = "stat.local",
+               .seq_show = cpu_cfs_local_stat_show,
+       },
 #endif
 #ifdef CONFIG_RT_GROUP_SCHED
        {
@@ -11271,6 +11299,24 @@ static int cpu_extra_stat_show(struct seq_file *sf,
        return 0;
 }
 
+static int cpu_local_stat_show(struct seq_file *sf,
+                              struct cgroup_subsys_state *css)
+{
+#ifdef CONFIG_CFS_BANDWIDTH
+       {
+               struct task_group *tg = css_tg(css);
+               u64 throttled_self_usec;
+
+               throttled_self_usec = throttled_time_self(tg);
+               do_div(throttled_self_usec, NSEC_PER_USEC);
+
+               seq_printf(sf, "throttled_usec %llu\n",
+                          throttled_self_usec);
+       }
+#endif
+       return 0;
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 static u64 cpu_weight_read_u64(struct cgroup_subsys_state *css,
                               struct cftype *cft)
@@ -11449,6 +11495,7 @@ struct cgroup_subsys cpu_cgrp_subsys = {
        .css_released   = cpu_cgroup_css_released,
        .css_free       = cpu_cgroup_css_free,
        .css_extra_stat_show = cpu_extra_stat_show,
+       .css_local_stat_show = cpu_local_stat_show,
 #ifdef CONFIG_RT_GROUP_SCHED
        .can_attach     = cpu_cgroup_can_attach,
 #endif
index 066ff1c..4c3d0d9 100644 (file)
@@ -347,10 +347,7 @@ static __init int sched_init_debug(void)
        debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
 #endif
 
-       debugfs_create_u32("latency_ns", 0644, debugfs_sched, &sysctl_sched_latency);
-       debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_min_granularity);
-       debugfs_create_u32("idle_min_granularity_ns", 0644, debugfs_sched, &sysctl_sched_idle_min_granularity);
-       debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched, &sysctl_sched_wakeup_granularity);
+       debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice);
 
        debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms);
        debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once);
@@ -427,6 +424,7 @@ static void register_sd(struct sched_domain *sd, struct dentry *parent)
 #undef SDM
 
        debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops);
+       debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops);
 }
 
 void update_sched_domain_debugfs(void)
@@ -581,9 +579,13 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
        else
                SEQ_printf(m, " %c", task_state_to_char(p));
 
-       SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
+       SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
                p->comm, task_pid_nr(p),
                SPLIT_NS(p->se.vruntime),
+               entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N',
+               SPLIT_NS(p->se.deadline),
+               SPLIT_NS(p->se.slice),
+               SPLIT_NS(p->se.sum_exec_runtime),
                (long long)(p->nvcsw + p->nivcsw),
                p->prio);
 
@@ -626,10 +628,9 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 {
-       s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
-               spread, rq0_min_vruntime, spread0;
+       s64 left_vruntime = -1, min_vruntime, right_vruntime = -1, spread;
+       struct sched_entity *last, *first;
        struct rq *rq = cpu_rq(cpu);
-       struct sched_entity *last;
        unsigned long flags;
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -643,26 +644,25 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
                        SPLIT_NS(cfs_rq->exec_clock));
 
        raw_spin_rq_lock_irqsave(rq, flags);
-       if (rb_first_cached(&cfs_rq->tasks_timeline))
-               MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
+       first = __pick_first_entity(cfs_rq);
+       if (first)
+               left_vruntime = first->vruntime;
        last = __pick_last_entity(cfs_rq);
        if (last)
-               max_vruntime = last->vruntime;
+               right_vruntime = last->vruntime;
        min_vruntime = cfs_rq->min_vruntime;
-       rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
        raw_spin_rq_unlock_irqrestore(rq, flags);
-       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
-                       SPLIT_NS(MIN_vruntime));
+
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "left_vruntime",
+                       SPLIT_NS(left_vruntime));
        SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
                        SPLIT_NS(min_vruntime));
-       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
-                       SPLIT_NS(max_vruntime));
-       spread = max_vruntime - MIN_vruntime;
-       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
-                       SPLIT_NS(spread));
-       spread0 = min_vruntime - rq0_min_vruntime;
-       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
-                       SPLIT_NS(spread0));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "avg_vruntime",
+                       SPLIT_NS(avg_vruntime(cfs_rq)));
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "right_vruntime",
+                       SPLIT_NS(right_vruntime));
+       spread = right_vruntime - left_vruntime;
+       SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread));
        SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
                        cfs_rq->nr_spread_over);
        SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
@@ -863,10 +863,7 @@ static void sched_debug_header(struct seq_file *m)
        SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
 #define PN(x) \
        SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
-       PN(sysctl_sched_latency);
-       PN(sysctl_sched_min_granularity);
-       PN(sysctl_sched_idle_min_granularity);
-       PN(sysctl_sched_wakeup_granularity);
+       PN(sysctl_sched_base_slice);
        P(sysctl_sched_child_runs_first);
        P(sysctl_sched_features);
 #undef PN
index a80a739..911d006 100644 (file)
@@ -47,6 +47,7 @@
 #include <linux/psi.h>
 #include <linux/ratelimit.h>
 #include <linux/task_work.h>
+#include <linux/rbtree_augmented.h>
 
 #include <asm/switch_to.h>
 
 #include "autogroup.h"
 
 /*
- * Targeted preemption latency for CPU-bound tasks:
- *
- * NOTE: this latency value is not the same as the concept of
- * 'timeslice length' - timeslices in CFS are of variable length
- * and have no persistent notion like in traditional, time-slice
- * based scheduling concepts.
- *
- * (to see the precise effective timeslice length of your workload,
- *  run vmstat and monitor the context-switches (cs) field)
- *
- * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
- */
-unsigned int sysctl_sched_latency                      = 6000000ULL;
-static unsigned int normalized_sysctl_sched_latency    = 6000000ULL;
-
-/*
  * The initial- and re-scaling of tunables is configurable
  *
  * Options are:
@@ -90,21 +75,8 @@ unsigned int sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
  *
  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
  */
-unsigned int sysctl_sched_min_granularity                      = 750000ULL;
-static unsigned int normalized_sysctl_sched_min_granularity    = 750000ULL;
-
-/*
- * Minimal preemption granularity for CPU-bound SCHED_IDLE tasks.
- * Applies only when SCHED_IDLE tasks compete with normal tasks.
- *
- * (default: 0.75 msec)
- */
-unsigned int sysctl_sched_idle_min_granularity                 = 750000ULL;
-
-/*
- * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
- */
-static unsigned int sched_nr_latency = 8;
+unsigned int sysctl_sched_base_slice                   = 750000ULL;
+static unsigned int normalized_sysctl_sched_base_slice = 750000ULL;
 
 /*
  * After fork, child runs first. If set to 0 (default) then
@@ -112,18 +84,6 @@ static unsigned int sched_nr_latency = 8;
  */
 unsigned int sysctl_sched_child_runs_first __read_mostly;
 
-/*
- * SCHED_OTHER wake-up granularity.
- *
- * This option delays the preemption effects of decoupled workloads
- * and reduces their over-scheduling. Synchronous workloads will still
- * have immediate wakeup/sleep latencies.
- *
- * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
- */
-unsigned int sysctl_sched_wakeup_granularity                   = 1000000UL;
-static unsigned int normalized_sysctl_sched_wakeup_granularity = 1000000UL;
-
 const_debug unsigned int sysctl_sched_migration_cost   = 500000UL;
 
 int sched_thermal_decay_shift;
@@ -277,9 +237,7 @@ static void update_sysctl(void)
 
 #define SET_SYSCTL(name) \
        (sysctl_##name = (factor) * normalized_sysctl_##name)
-       SET_SYSCTL(sched_min_granularity);
-       SET_SYSCTL(sched_latency);
-       SET_SYSCTL(sched_wakeup_granularity);
+       SET_SYSCTL(sched_base_slice);
 #undef SET_SYSCTL
 }
 
@@ -347,6 +305,16 @@ static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight
        return mul_u64_u32_shr(delta_exec, fact, shift);
 }
 
+/*
+ * delta /= w
+ */
+static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
+{
+       if (unlikely(se->load.weight != NICE_0_LOAD))
+               delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
+
+       return delta;
+}
 
 const struct sched_class fair_sched_class;
 
@@ -601,13 +569,198 @@ static inline bool entity_before(const struct sched_entity *a,
        return (s64)(a->vruntime - b->vruntime) < 0;
 }
 
+static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       return (s64)(se->vruntime - cfs_rq->min_vruntime);
+}
+
 #define __node_2_se(node) \
        rb_entry((node), struct sched_entity, run_node)
 
+/*
+ * Compute virtual time from the per-task service numbers:
+ *
+ * Fair schedulers conserve lag:
+ *
+ *   \Sum lag_i = 0
+ *
+ * Where lag_i is given by:
+ *
+ *   lag_i = S - s_i = w_i * (V - v_i)
+ *
+ * Where S is the ideal service time and V is it's virtual time counterpart.
+ * Therefore:
+ *
+ *   \Sum lag_i = 0
+ *   \Sum w_i * (V - v_i) = 0
+ *   \Sum w_i * V - w_i * v_i = 0
+ *
+ * From which we can solve an expression for V in v_i (which we have in
+ * se->vruntime):
+ *
+ *       \Sum v_i * w_i   \Sum v_i * w_i
+ *   V = -------------- = --------------
+ *          \Sum w_i            W
+ *
+ * Specifically, this is the weighted average of all entity virtual runtimes.
+ *
+ * [[ NOTE: this is only equal to the ideal scheduler under the condition
+ *          that join/leave operations happen at lag_i = 0, otherwise the
+ *          virtual time has non-continguous motion equivalent to:
+ *
+ *           V +-= lag_i / W
+ *
+ *         Also see the comment in place_entity() that deals with this. ]]
+ *
+ * However, since v_i is u64, and the multiplcation could easily overflow
+ * transform it into a relative form that uses smaller quantities:
+ *
+ * Substitute: v_i == (v_i - v0) + v0
+ *
+ *     \Sum ((v_i - v0) + v0) * w_i   \Sum (v_i - v0) * w_i
+ * V = ---------------------------- = --------------------- + v0
+ *                  W                            W
+ *
+ * Which we track using:
+ *
+ *                    v0 := cfs_rq->min_vruntime
+ * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
+ *              \Sum w_i := cfs_rq->avg_load
+ *
+ * Since min_vruntime is a monotonic increasing variable that closely tracks
+ * the per-task service, these deltas: (v_i - v), will be in the order of the
+ * maximal (virtual) lag induced in the system due to quantisation.
+ *
+ * Also, we use scale_load_down() to reduce the size.
+ *
+ * As measured, the max (key * weight) value was ~44 bits for a kernel build.
+ */
+static void
+avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       unsigned long weight = scale_load_down(se->load.weight);
+       s64 key = entity_key(cfs_rq, se);
+
+       cfs_rq->avg_vruntime += key * weight;
+       cfs_rq->avg_load += weight;
+}
+
+static void
+avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       unsigned long weight = scale_load_down(se->load.weight);
+       s64 key = entity_key(cfs_rq, se);
+
+       cfs_rq->avg_vruntime -= key * weight;
+       cfs_rq->avg_load -= weight;
+}
+
+static inline
+void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
+{
+       /*
+        * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
+        */
+       cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
+}
+
+u64 avg_vruntime(struct cfs_rq *cfs_rq)
+{
+       struct sched_entity *curr = cfs_rq->curr;
+       s64 avg = cfs_rq->avg_vruntime;
+       long load = cfs_rq->avg_load;
+
+       if (curr && curr->on_rq) {
+               unsigned long weight = scale_load_down(curr->load.weight);
+
+               avg += entity_key(cfs_rq, curr) * weight;
+               load += weight;
+       }
+
+       if (load)
+               avg = div_s64(avg, load);
+
+       return cfs_rq->min_vruntime + avg;
+}
+
+/*
+ * lag_i = S - s_i = w_i * (V - v_i)
+ *
+ * However, since V is approximated by the weighted average of all entities it
+ * is possible -- by addition/removal/reweight to the tree -- to move V around
+ * and end up with a larger lag than we started with.
+ *
+ * Limit this to either double the slice length with a minimum of TICK_NSEC
+ * since that is the timing granularity.
+ *
+ * EEVDF gives the following limit for a steady state system:
+ *
+ *   -r_max < lag < max(r_max, q)
+ *
+ * XXX could add max_slice to the augmented data to track this.
+ */
+void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       s64 lag, limit;
+
+       SCHED_WARN_ON(!se->on_rq);
+       lag = avg_vruntime(cfs_rq) - se->vruntime;
+
+       limit = calc_delta_fair(max_t(u64, 2*se->slice, TICK_NSEC), se);
+       se->vlag = clamp(lag, -limit, limit);
+}
+
+/*
+ * Entity is eligible once it received less service than it ought to have,
+ * eg. lag >= 0.
+ *
+ * lag_i = S - s_i = w_i*(V - v_i)
+ *
+ * lag_i >= 0 -> V >= v_i
+ *
+ *     \Sum (v_i - v)*w_i
+ * V = ------------------ + v
+ *          \Sum w_i
+ *
+ * lag_i >= 0 -> \Sum (v_i - v)*w_i >= (v_i - v)*(\Sum w_i)
+ *
+ * Note: using 'avg_vruntime() > se->vruntime' is inacurate due
+ *       to the loss in precision caused by the division.
+ */
+int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       struct sched_entity *curr = cfs_rq->curr;
+       s64 avg = cfs_rq->avg_vruntime;
+       long load = cfs_rq->avg_load;
+
+       if (curr && curr->on_rq) {
+               unsigned long weight = scale_load_down(curr->load.weight);
+
+               avg += entity_key(cfs_rq, curr) * weight;
+               load += weight;
+       }
+
+       return avg >= entity_key(cfs_rq, se) * load;
+}
+
+static u64 __update_min_vruntime(struct cfs_rq *cfs_rq, u64 vruntime)
+{
+       u64 min_vruntime = cfs_rq->min_vruntime;
+       /*
+        * open coded max_vruntime() to allow updating avg_vruntime
+        */
+       s64 delta = (s64)(vruntime - min_vruntime);
+       if (delta > 0) {
+               avg_vruntime_update(cfs_rq, delta);
+               min_vruntime = vruntime;
+       }
+       return min_vruntime;
+}
+
 static void update_min_vruntime(struct cfs_rq *cfs_rq)
 {
+       struct sched_entity *se = __pick_first_entity(cfs_rq);
        struct sched_entity *curr = cfs_rq->curr;
-       struct rb_node *leftmost = rb_first_cached(&cfs_rq->tasks_timeline);
 
        u64 vruntime = cfs_rq->min_vruntime;
 
@@ -618,9 +771,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
                        curr = NULL;
        }
 
-       if (leftmost) { /* non-empty tree */
-               struct sched_entity *se = __node_2_se(leftmost);
-
+       if (se) {
                if (!curr)
                        vruntime = se->vruntime;
                else
@@ -629,7 +780,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
 
        /* ensure we never gain time by being placed backwards. */
        u64_u32_store(cfs_rq->min_vruntime,
-                     max_vruntime(cfs_rq->min_vruntime, vruntime));
+                     __update_min_vruntime(cfs_rq, vruntime));
 }
 
 static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
@@ -637,17 +788,51 @@ static inline bool __entity_less(struct rb_node *a, const struct rb_node *b)
        return entity_before(__node_2_se(a), __node_2_se(b));
 }
 
+#define deadline_gt(field, lse, rse) ({ (s64)((lse)->field - (rse)->field) > 0; })
+
+static inline void __update_min_deadline(struct sched_entity *se, struct rb_node *node)
+{
+       if (node) {
+               struct sched_entity *rse = __node_2_se(node);
+               if (deadline_gt(min_deadline, se, rse))
+                       se->min_deadline = rse->min_deadline;
+       }
+}
+
+/*
+ * se->min_deadline = min(se->deadline, left->min_deadline, right->min_deadline)
+ */
+static inline bool min_deadline_update(struct sched_entity *se, bool exit)
+{
+       u64 old_min_deadline = se->min_deadline;
+       struct rb_node *node = &se->run_node;
+
+       se->min_deadline = se->deadline;
+       __update_min_deadline(se, node->rb_right);
+       __update_min_deadline(se, node->rb_left);
+
+       return se->min_deadline == old_min_deadline;
+}
+
+RB_DECLARE_CALLBACKS(static, min_deadline_cb, struct sched_entity,
+                    run_node, min_deadline, min_deadline_update);
+
 /*
  * Enqueue an entity into the rb-tree:
  */
 static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-       rb_add_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less);
+       avg_vruntime_add(cfs_rq, se);
+       se->min_deadline = se->deadline;
+       rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+                               __entity_less, &min_deadline_cb);
 }
 
 static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-       rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
+       rb_erase_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline,
+                                 &min_deadline_cb);
+       avg_vruntime_sub(cfs_rq, se);
 }
 
 struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
@@ -660,14 +845,88 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
        return __node_2_se(left);
 }
 
-static struct sched_entity *__pick_next_entity(struct sched_entity *se)
+/*
+ * Earliest Eligible Virtual Deadline First
+ *
+ * In order to provide latency guarantees for different request sizes
+ * EEVDF selects the best runnable task from two criteria:
+ *
+ *  1) the task must be eligible (must be owed service)
+ *
+ *  2) from those tasks that meet 1), we select the one
+ *     with the earliest virtual deadline.
+ *
+ * We can do this in O(log n) time due to an augmented RB-tree. The
+ * tree keeps the entries sorted on service, but also functions as a
+ * heap based on the deadline by keeping:
+ *
+ *  se->min_deadline = min(se->deadline, se->{left,right}->min_deadline)
+ *
+ * Which allows an EDF like search on (sub)trees.
+ */
+static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 {
-       struct rb_node *next = rb_next(&se->run_node);
+       struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
+       struct sched_entity *curr = cfs_rq->curr;
+       struct sched_entity *best = NULL;
 
-       if (!next)
-               return NULL;
+       if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
+               curr = NULL;
+
+       /*
+        * Once selected, run a task until it either becomes non-eligible or
+        * until it gets a new slice. See the HACK in set_next_entity().
+        */
+       if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
+               return curr;
+
+       while (node) {
+               struct sched_entity *se = __node_2_se(node);
+
+               /*
+                * If this entity is not eligible, try the left subtree.
+                */
+               if (!entity_eligible(cfs_rq, se)) {
+                       node = node->rb_left;
+                       continue;
+               }
 
-       return __node_2_se(next);
+               /*
+                * If this entity has an earlier deadline than the previous
+                * best, take this one. If it also has the earliest deadline
+                * of its subtree, we're done.
+                */
+               if (!best || deadline_gt(deadline, best, se)) {
+                       best = se;
+                       if (best->deadline == best->min_deadline)
+                               break;
+               }
+
+               /*
+                * If the earlest deadline in this subtree is in the fully
+                * eligible left half of our space, go there.
+                */
+               if (node->rb_left &&
+                   __node_2_se(node->rb_left)->min_deadline == se->min_deadline) {
+                       node = node->rb_left;
+                       continue;
+               }
+
+               node = node->rb_right;
+       }
+
+       if (!best || (curr && deadline_gt(deadline, best, curr)))
+               best = curr;
+
+       if (unlikely(!best)) {
+               struct sched_entity *left = __pick_first_entity(cfs_rq);
+               if (left) {
+                       pr_err("EEVDF scheduling fail, picking leftmost\n");
+                       return left;
+               }
+       }
+
+       return best;
 }
 
 #ifdef CONFIG_SCHED_DEBUG
@@ -684,109 +943,51 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
 /**************************************************************
  * Scheduling class statistics methods:
  */
-
+#ifdef CONFIG_SMP
 int sched_update_scaling(void)
 {
        unsigned int factor = get_update_sysctl_factor();
 
-       sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
-                                       sysctl_sched_min_granularity);
-
 #define WRT_SYSCTL(name) \
        (normalized_sysctl_##name = sysctl_##name / (factor))
-       WRT_SYSCTL(sched_min_granularity);
-       WRT_SYSCTL(sched_latency);
-       WRT_SYSCTL(sched_wakeup_granularity);
+       WRT_SYSCTL(sched_base_slice);
 #undef WRT_SYSCTL
 
        return 0;
 }
 #endif
+#endif
 
-/*
- * delta /= w
- */
-static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
-{
-       if (unlikely(se->load.weight != NICE_0_LOAD))
-               delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
-
-       return delta;
-}
-
-/*
- * The idea is to set a period in which each task runs once.
- *
- * When there are too many tasks (sched_nr_latency) we have to stretch
- * this period because otherwise the slices get too small.
- *
- * p = (nr <= nl) ? l : l*nr/nl
- */
-static u64 __sched_period(unsigned long nr_running)
-{
-       if (unlikely(nr_running > sched_nr_latency))
-               return nr_running * sysctl_sched_min_granularity;
-       else
-               return sysctl_sched_latency;
-}
-
-static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq);
+static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
 
 /*
- * We calculate the wall-time slice from the period by taking a part
- * proportional to the weight.
- *
- * s = p*P[w/rw]
+ * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
+ * this is probably good enough.
  */
-static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-       unsigned int nr_running = cfs_rq->nr_running;
-       struct sched_entity *init_se = se;
-       unsigned int min_gran;
-       u64 slice;
-
-       if (sched_feat(ALT_PERIOD))
-               nr_running = rq_of(cfs_rq)->cfs.h_nr_running;
-
-       slice = __sched_period(nr_running + !se->on_rq);
-
-       for_each_sched_entity(se) {
-               struct load_weight *load;
-               struct load_weight lw;
-               struct cfs_rq *qcfs_rq;
-
-               qcfs_rq = cfs_rq_of(se);
-               load = &qcfs_rq->load;
-
-               if (unlikely(!se->on_rq)) {
-                       lw = qcfs_rq->load;
+       if ((s64)(se->vruntime - se->deadline) < 0)
+               return;
 
-                       update_load_add(&lw, se->load.weight);
-                       load = &lw;
-               }
-               slice = __calc_delta(slice, se->load.weight, load);
-       }
+       /*
+        * For EEVDF the virtual time slope is determined by w_i (iow.
+        * nice) while the request time r_i is determined by
+        * sysctl_sched_base_slice.
+        */
+       se->slice = sysctl_sched_base_slice;
 
-       if (sched_feat(BASE_SLICE)) {
-               if (se_is_idle(init_se) && !sched_idle_cfs_rq(cfs_rq))
-                       min_gran = sysctl_sched_idle_min_granularity;
-               else
-                       min_gran = sysctl_sched_min_granularity;
+       /*
+        * EEVDF: vd_i = ve_i + r_i / w_i
+        */
+       se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
 
-               slice = max_t(u64, slice, min_gran);
+       /*
+        * The task has consumed its request, reschedule.
+        */
+       if (cfs_rq->nr_running > 1) {
+               resched_curr(rq_of(cfs_rq));
+               clear_buddies(cfs_rq, se);
        }
-
-       return slice;
-}
-
-/*
- * We calculate the vruntime slice of a to-be-inserted task.
- *
- * vs = s/w
- */
-static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-       return calc_delta_fair(sched_slice(cfs_rq, se), se);
 }
 
 #include "pelt.h"
@@ -921,6 +1122,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
        schedstat_add(cfs_rq->exec_clock, delta_exec);
 
        curr->vruntime += calc_delta_fair(delta_exec, curr);
+       update_deadline(cfs_rq, curr);
        update_min_vruntime(cfs_rq);
 
        if (entity_is_task(curr)) {
@@ -3375,16 +3577,36 @@ dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
 static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
                            unsigned long weight)
 {
+       unsigned long old_weight = se->load.weight;
+
        if (se->on_rq) {
                /* commit outstanding execution time */
                if (cfs_rq->curr == se)
                        update_curr(cfs_rq);
+               else
+                       avg_vruntime_sub(cfs_rq, se);
                update_load_sub(&cfs_rq->load, se->load.weight);
        }
        dequeue_load_avg(cfs_rq, se);
 
        update_load_set(&se->load, weight);
 
+       if (!se->on_rq) {
+               /*
+                * Because we keep se->vlag = V - v_i, while: lag_i = w_i*(V - v_i),
+                * we need to scale se->vlag when w_i changes.
+                */
+               se->vlag = div_s64(se->vlag * old_weight, weight);
+       } else {
+               s64 deadline = se->deadline - se->vruntime;
+               /*
+                * When the weight changes, the virtual time slope changes and
+                * we should adjust the relative virtual deadline accordingly.
+                */
+               deadline = div_s64(deadline * old_weight, weight);
+               se->deadline = se->vruntime + deadline;
+       }
+
 #ifdef CONFIG_SMP
        do {
                u32 divider = get_pelt_divider(&se->avg);
@@ -3394,9 +3616,11 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 #endif
 
        enqueue_load_avg(cfs_rq, se);
-       if (se->on_rq)
+       if (se->on_rq) {
                update_load_add(&cfs_rq->load, se->load.weight);
-
+               if (cfs_rq->curr != se)
+                       avg_vruntime_add(cfs_rq, se);
+       }
 }
 
 void reweight_task(struct task_struct *p, int prio)
@@ -4692,159 +4916,125 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
 
 #endif /* CONFIG_SMP */
 
-static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
-#ifdef CONFIG_SCHED_DEBUG
-       s64 d = se->vruntime - cfs_rq->min_vruntime;
-
-       if (d < 0)
-               d = -d;
-
-       if (d > 3*sysctl_sched_latency)
-               schedstat_inc(cfs_rq->nr_spread_over);
-#endif
-}
-
-static inline bool entity_is_long_sleeper(struct sched_entity *se)
+static void
+place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-       struct cfs_rq *cfs_rq;
-       u64 sleep_time;
-
-       if (se->exec_start == 0)
-               return false;
-
-       cfs_rq = cfs_rq_of(se);
+       u64 vslice = calc_delta_fair(se->slice, se);
+       u64 vruntime = avg_vruntime(cfs_rq);
+       s64 lag = 0;
 
-       sleep_time = rq_clock_task(rq_of(cfs_rq));
+       /*
+        * Due to how V is constructed as the weighted average of entities,
+        * adding tasks with positive lag, or removing tasks with negative lag
+        * will move 'time' backwards, this can screw around with the lag of
+        * other tasks.
+        *
+        * EEVDF: placement strategy #1 / #2
+        */
+       if (sched_feat(PLACE_LAG) && cfs_rq->nr_running) {
+               struct sched_entity *curr = cfs_rq->curr;
+               unsigned long load;
 
-       /* Happen while migrating because of clock task divergence */
-       if (sleep_time <= se->exec_start)
-               return false;
+               lag = se->vlag;
 
-       sleep_time -= se->exec_start;
-       if (sleep_time > ((1ULL << 63) / scale_load_down(NICE_0_LOAD)))
-               return true;
+               /*
+                * If we want to place a task and preserve lag, we have to
+                * consider the effect of the new entity on the weighted
+                * average and compensate for this, otherwise lag can quickly
+                * evaporate.
+                *
+                * Lag is defined as:
+                *
+                *   lag_i = S - s_i = w_i * (V - v_i)
+                *
+                * To avoid the 'w_i' term all over the place, we only track
+                * the virtual lag:
+                *
+                *   vl_i = V - v_i <=> v_i = V - vl_i
+                *
+                * And we take V to be the weighted average of all v:
+                *
+                *   V = (\Sum w_j*v_j) / W
+                *
+                * Where W is: \Sum w_j
+                *
+                * Then, the weighted average after adding an entity with lag
+                * vl_i is given by:
+                *
+                *   V' = (\Sum w_j*v_j + w_i*v_i) / (W + w_i)
+                *      = (W*V + w_i*(V - vl_i)) / (W + w_i)
+                *      = (W*V + w_i*V - w_i*vl_i) / (W + w_i)
+                *      = (V*(W + w_i) - w_i*l) / (W + w_i)
+                *      = V - w_i*vl_i / (W + w_i)
+                *
+                * And the actual lag after adding an entity with vl_i is:
+                *
+                *   vl'_i = V' - v_i
+                *         = V - w_i*vl_i / (W + w_i) - (V - vl_i)
+                *         = vl_i - w_i*vl_i / (W + w_i)
+                *
+                * Which is strictly less than vl_i. So in order to preserve lag
+                * we should inflate the lag before placement such that the
+                * effective lag after placement comes out right.
+                *
+                * As such, invert the above relation for vl'_i to get the vl_i
+                * we need to use such that the lag after placement is the lag
+                * we computed before dequeue.
+                *
+                *   vl'_i = vl_i - w_i*vl_i / (W + w_i)
+                *         = ((W + w_i)*vl_i - w_i*vl_i) / (W + w_i)
+                *
+                *   (W + w_i)*vl'_i = (W + w_i)*vl_i - w_i*vl_i
+                *                   = W*vl_i
+                *
+                *   vl_i = (W + w_i)*vl'_i / W
+                */
+               load = cfs_rq->avg_load;
+               if (curr && curr->on_rq)
+                       load += scale_load_down(curr->load.weight);
 
-       return false;
-}
+               lag *= load + scale_load_down(se->load.weight);
+               if (WARN_ON_ONCE(!load))
+                       load = 1;
+               lag = div_s64(lag, load);
+       }
 
-static void
-place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
-{
-       u64 vruntime = cfs_rq->min_vruntime;
+       se->vruntime = vruntime - lag;
 
        /*
-        * The 'current' period is already promised to the current tasks,
-        * however the extra weight of the new task will slow them down a
-        * little, place the new task so that it fits in the slot that
-        * stays open at the end.
+        * When joining the competition; the exisiting tasks will be,
+        * on average, halfway through their slice, as such start tasks
+        * off with half a slice to ease into the competition.
         */
-       if (initial && sched_feat(START_DEBIT))
-               vruntime += sched_vslice(cfs_rq, se);
-
-       /* sleeps up to a single latency don't count. */
-       if (!initial) {
-               unsigned long thresh;
+       if (sched_feat(PLACE_DEADLINE_INITIAL) && (flags & ENQUEUE_INITIAL))
+               vslice /= 2;
 
-               if (se_is_idle(se))
-                       thresh = sysctl_sched_min_granularity;
-               else
-                       thresh = sysctl_sched_latency;
-
-               /*
-                * Halve their sleep time's effect, to allow
-                * for a gentler effect of sleepers:
-                */
-               if (sched_feat(GENTLE_FAIR_SLEEPERS))
-                       thresh >>= 1;
-
-               vruntime -= thresh;
-       }
-
-       /*
-        * Pull vruntime of the entity being placed to the base level of
-        * cfs_rq, to prevent boosting it if placed backwards.
-        * However, min_vruntime can advance much faster than real time, with
-        * the extreme being when an entity with the minimal weight always runs
-        * on the cfs_rq. If the waking entity slept for a long time, its
-        * vruntime difference from min_vruntime may overflow s64 and their
-        * comparison may get inversed, so ignore the entity's original
-        * vruntime in that case.
-        * The maximal vruntime speedup is given by the ratio of normal to
-        * minimal weight: scale_load_down(NICE_0_LOAD) / MIN_SHARES.
-        * When placing a migrated waking entity, its exec_start has been set
-        * from a different rq. In order to take into account a possible
-        * divergence between new and prev rq's clocks task because of irq and
-        * stolen time, we take an additional margin.
-        * So, cutting off on the sleep time of
-        *     2^63 / scale_load_down(NICE_0_LOAD) ~ 104 days
-        * should be safe.
-        */
-       if (entity_is_long_sleeper(se))
-               se->vruntime = vruntime;
-       else
-               se->vruntime = max_vruntime(se->vruntime, vruntime);
+       /*
+        * EEVDF: vd_i = ve_i + r_i/w_i
+        */
+       se->deadline = se->vruntime + vslice;
 }
 
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
+static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq);
 
 static inline bool cfs_bandwidth_used(void);
 
-/*
- * MIGRATION
- *
- *     dequeue
- *       update_curr()
- *         update_min_vruntime()
- *       vruntime -= min_vruntime
- *
- *     enqueue
- *       update_curr()
- *         update_min_vruntime()
- *       vruntime += min_vruntime
- *
- * this way the vruntime transition between RQs is done when both
- * min_vruntime are up-to-date.
- *
- * WAKEUP (remote)
- *
- *     ->migrate_task_rq_fair() (p->state == TASK_WAKING)
- *       vruntime -= min_vruntime
- *
- *     enqueue
- *       update_curr()
- *         update_min_vruntime()
- *       vruntime += min_vruntime
- *
- * this way we don't have the most up-to-date min_vruntime on the originating
- * CPU and an up-to-date min_vruntime on the destination CPU.
- */
-
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
-       bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
        bool curr = cfs_rq->curr == se;
 
        /*
         * If we're the current task, we must renormalise before calling
         * update_curr().
         */
-       if (renorm && curr)
-               se->vruntime += cfs_rq->min_vruntime;
+       if (curr)
+               place_entity(cfs_rq, se, flags);
 
        update_curr(cfs_rq);
 
        /*
-        * Otherwise, renormalise after, such that we're placed at the current
-        * moment in time, instead of some random moment in the past. Being
-        * placed in the past could significantly boost this task to the
-        * fairness detriment of existing tasks.
-        */
-       if (renorm && !curr)
-               se->vruntime += cfs_rq->min_vruntime;
-
-       /*
         * When enqueuing a sched_entity, we must:
         *   - Update loads to have both entity and cfs_rq synced with now.
         *   - For group_entity, update its runnable_weight to reflect the new
@@ -4855,37 +5045,46 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
         */
        update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
        se_update_runnable(se);
+       /*
+        * XXX update_load_avg() above will have attached us to the pelt sum;
+        * but update_cfs_group() here will re-adjust the weight and have to
+        * undo/redo all that. Seems wasteful.
+        */
        update_cfs_group(se);
+
+       /*
+        * XXX now that the entity has been re-weighted, and it's lag adjusted,
+        * we can place the entity.
+        */
+       if (!curr)
+               place_entity(cfs_rq, se, flags);
+
        account_entity_enqueue(cfs_rq, se);
 
-       if (flags & ENQUEUE_WAKEUP)
-               place_entity(cfs_rq, se, 0);
        /* Entity has migrated, no longer consider this task hot */
        if (flags & ENQUEUE_MIGRATED)
                se->exec_start = 0;
 
        check_schedstat_required();
        update_stats_enqueue_fair(cfs_rq, se, flags);
-       check_spread(cfs_rq, se);
        if (!curr)
                __enqueue_entity(cfs_rq, se);
        se->on_rq = 1;
 
        if (cfs_rq->nr_running == 1) {
                check_enqueue_throttle(cfs_rq);
-               if (!throttled_hierarchy(cfs_rq))
+               if (!throttled_hierarchy(cfs_rq)) {
                        list_add_leaf_cfs_rq(cfs_rq);
-       }
-}
-
-static void __clear_buddies_last(struct sched_entity *se)
-{
-       for_each_sched_entity(se) {
-               struct cfs_rq *cfs_rq = cfs_rq_of(se);
-               if (cfs_rq->last != se)
-                       break;
+               } else {
+#ifdef CONFIG_CFS_BANDWIDTH
+                       struct rq *rq = rq_of(cfs_rq);
 
-               cfs_rq->last = NULL;
+                       if (cfs_rq_throttled(cfs_rq) && !cfs_rq->throttled_clock)
+                               cfs_rq->throttled_clock = rq_clock(rq);
+                       if (!cfs_rq->throttled_clock_self)
+                               cfs_rq->throttled_clock_self = rq_clock(rq);
+#endif
+               }
        }
 }
 
@@ -4900,27 +5099,10 @@ static void __clear_buddies_next(struct sched_entity *se)
        }
 }
 
-static void __clear_buddies_skip(struct sched_entity *se)
-{
-       for_each_sched_entity(se) {
-               struct cfs_rq *cfs_rq = cfs_rq_of(se);
-               if (cfs_rq->skip != se)
-                       break;
-
-               cfs_rq->skip = NULL;
-       }
-}
-
 static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-       if (cfs_rq->last == se)
-               __clear_buddies_last(se);
-
        if (cfs_rq->next == se)
                __clear_buddies_next(se);
-
-       if (cfs_rq->skip == se)
-               __clear_buddies_skip(se);
 }
 
 static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -4954,82 +5136,28 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 
        clear_buddies(cfs_rq, se);
 
+       update_entity_lag(cfs_rq, se);
        if (se != cfs_rq->curr)
                __dequeue_entity(cfs_rq, se);
        se->on_rq = 0;
-       account_entity_dequeue(cfs_rq, se);
-
-       /*
-        * Normalize after update_curr(); which will also have moved
-        * min_vruntime if @se is the one holding it back. But before doing
-        * update_min_vruntime() again, which will discount @se's position and
-        * can move min_vruntime forward still more.
-        */
-       if (!(flags & DEQUEUE_SLEEP))
-               se->vruntime -= cfs_rq->min_vruntime;
-
-       /* return excess runtime on last dequeue */
-       return_cfs_rq_runtime(cfs_rq);
-
-       update_cfs_group(se);
-
-       /*
-        * Now advance min_vruntime if @se was the entity holding it back,
-        * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
-        * put back on, and if we advance min_vruntime, we'll be placed back
-        * further than we started -- ie. we'll be penalized.
-        */
-       if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
-               update_min_vruntime(cfs_rq);
-
-       if (cfs_rq->nr_running == 0)
-               update_idle_cfs_rq_clock_pelt(cfs_rq);
-}
-
-/*
- * Preempt the current task with a newly woken task if needed:
- */
-static void
-check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
-{
-       unsigned long ideal_runtime, delta_exec;
-       struct sched_entity *se;
-       s64 delta;
+       account_entity_dequeue(cfs_rq, se);
 
-       /*
-        * When many tasks blow up the sched_period; it is possible that
-        * sched_slice() reports unusually large results (when many tasks are
-        * very light for example). Therefore impose a maximum.
-        */
-       ideal_runtime = min_t(u64, sched_slice(cfs_rq, curr), sysctl_sched_latency);
+       /* return excess runtime on last dequeue */
+       return_cfs_rq_runtime(cfs_rq);
 
-       delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
-       if (delta_exec > ideal_runtime) {
-               resched_curr(rq_of(cfs_rq));
-               /*
-                * The current task ran long enough, ensure it doesn't get
-                * re-elected due to buddy favours.
-                */
-               clear_buddies(cfs_rq, curr);
-               return;
-       }
+       update_cfs_group(se);
 
        /*
-        * Ensure that a task that missed wakeup preemption by a
-        * narrow margin doesn't have to wait for a full slice.
-        * This also mitigates buddy induced latencies under load.
+        * Now advance min_vruntime if @se was the entity holding it back,
+        * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
+        * put back on, and if we advance min_vruntime, we'll be placed back
+        * further than we started -- ie. we'll be penalized.
         */
-       if (delta_exec < sysctl_sched_min_granularity)
-               return;
-
-       se = __pick_first_entity(cfs_rq);
-       delta = curr->vruntime - se->vruntime;
-
-       if (delta < 0)
-               return;
+       if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
+               update_min_vruntime(cfs_rq);
 
-       if (delta > ideal_runtime)
-               resched_curr(rq_of(cfs_rq));
+       if (cfs_rq->nr_running == 0)
+               update_idle_cfs_rq_clock_pelt(cfs_rq);
 }
 
 static void
@@ -5047,6 +5175,11 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
                update_stats_wait_end_fair(cfs_rq, se);
                __dequeue_entity(cfs_rq, se);
                update_load_avg(cfs_rq, se, UPDATE_TG);
+               /*
+                * HACK, stash a copy of deadline at the point of pick in vlag,
+                * which isn't used until dequeue.
+                */
+               se->vlag = se->deadline;
        }
 
        update_stats_curr_start(cfs_rq, se);
@@ -5070,9 +5203,6 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
        se->prev_sum_exec_runtime = se->sum_exec_runtime;
 }
 
-static int
-wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
-
 /*
  * Pick the next process, keeping these things in mind, in this order:
  * 1) keep things fair between processes/task groups
@@ -5083,50 +5213,14 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
 static struct sched_entity *
 pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-       struct sched_entity *left = __pick_first_entity(cfs_rq);
-       struct sched_entity *se;
-
-       /*
-        * If curr is set we have to see if its left of the leftmost entity
-        * still in the tree, provided there was anything in the tree at all.
-        */
-       if (!left || (curr && entity_before(curr, left)))
-               left = curr;
-
-       se = left; /* ideally we run the leftmost entity */
-
        /*
-        * Avoid running the skip buddy, if running something else can
-        * be done without getting too unfair.
+        * Enabling NEXT_BUDDY will affect latency but not fairness.
         */
-       if (cfs_rq->skip && cfs_rq->skip == se) {
-               struct sched_entity *second;
-
-               if (se == curr) {
-                       second = __pick_first_entity(cfs_rq);
-               } else {
-                       second = __pick_next_entity(se);
-                       if (!second || (curr && entity_before(curr, second)))
-                               second = curr;
-               }
-
-               if (second && wakeup_preempt_entity(second, left) < 1)
-                       se = second;
-       }
+       if (sched_feat(NEXT_BUDDY) &&
+           cfs_rq->next && entity_eligible(cfs_rq, cfs_rq->next))
+               return cfs_rq->next;
 
-       if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) {
-               /*
-                * Someone really wants this to run. If it's not unfair, run it.
-                */
-               se = cfs_rq->next;
-       } else if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) {
-               /*
-                * Prefer last buddy, try to return the CPU to a preempted task.
-                */
-               se = cfs_rq->last;
-       }
-
-       return se;
+       return pick_eevdf(cfs_rq);
 }
 
 static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
@@ -5143,8 +5237,6 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
        /* throttle cfs_rqs exceeding runtime */
        check_cfs_rq_runtime(cfs_rq);
 
-       check_spread(cfs_rq, prev);
-
        if (prev->on_rq) {
                update_stats_wait_start_fair(cfs_rq, prev);
                /* Put 'current' back into the tree. */
@@ -5185,9 +5277,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
                        hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
                return;
 #endif
-
-       if (cfs_rq->nr_running > 1)
-               check_preempt_tick(cfs_rq, curr);
 }
 
 
@@ -5377,6 +5466,17 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
                /* Add cfs_rq with load or one or more already running entities to the list */
                if (!cfs_rq_is_decayed(cfs_rq))
                        list_add_leaf_cfs_rq(cfs_rq);
+
+               if (cfs_rq->throttled_clock_self) {
+                       u64 delta = rq_clock(rq) - cfs_rq->throttled_clock_self;
+
+                       cfs_rq->throttled_clock_self = 0;
+
+                       if (SCHED_WARN_ON((s64)delta < 0))
+                               delta = 0;
+
+                       cfs_rq->throttled_clock_self_time += delta;
+               }
        }
 
        return 0;
@@ -5391,6 +5491,10 @@ static int tg_throttle_down(struct task_group *tg, void *data)
        if (!cfs_rq->throttle_count) {
                cfs_rq->throttled_clock_pelt = rq_clock_pelt(rq);
                list_del_leaf_cfs_rq(cfs_rq);
+
+               SCHED_WARN_ON(cfs_rq->throttled_clock_self);
+               if (cfs_rq->nr_running)
+                       cfs_rq->throttled_clock_self = rq_clock(rq);
        }
        cfs_rq->throttle_count++;
 
@@ -5480,7 +5584,9 @@ done:
         * throttled-list.  rq->lock protects completion.
         */
        cfs_rq->throttled = 1;
-       cfs_rq->throttled_clock = rq_clock(rq);
+       SCHED_WARN_ON(cfs_rq->throttled_clock);
+       if (cfs_rq->nr_running)
+               cfs_rq->throttled_clock = rq_clock(rq);
        return true;
 }
 
@@ -5498,7 +5604,10 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
        update_rq_clock(rq);
 
        raw_spin_lock(&cfs_b->lock);
-       cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+       if (cfs_rq->throttled_clock) {
+               cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
+               cfs_rq->throttled_clock = 0;
+       }
        list_del_rcu(&cfs_rq->throttled_list);
        raw_spin_unlock(&cfs_b->lock);
 
@@ -6014,13 +6123,14 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
        return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
 }
 
-void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent)
 {
        raw_spin_lock_init(&cfs_b->lock);
        cfs_b->runtime = 0;
        cfs_b->quota = RUNTIME_INF;
        cfs_b->period = ns_to_ktime(default_cfs_period());
        cfs_b->burst = 0;
+       cfs_b->hierarchical_quota = parent ? parent->hierarchical_quota : RUNTIME_INF;
 
        INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
        hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
@@ -6157,6 +6267,46 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
        rq_clock_stop_loop_update(rq);
 }
 
+bool cfs_task_bw_constrained(struct task_struct *p)
+{
+       struct cfs_rq *cfs_rq = task_cfs_rq(p);
+
+       if (!cfs_bandwidth_used())
+               return false;
+
+       if (cfs_rq->runtime_enabled ||
+           tg_cfs_bandwidth(cfs_rq->tg)->hierarchical_quota != RUNTIME_INF)
+               return true;
+
+       return false;
+}
+
+#ifdef CONFIG_NO_HZ_FULL
+/* called from pick_next_task_fair() */
+static void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p)
+{
+       int cpu = cpu_of(rq);
+
+       if (!sched_feat(HZ_BW) || !cfs_bandwidth_used())
+               return;
+
+       if (!tick_nohz_full_cpu(cpu))
+               return;
+
+       if (rq->nr_running != 1)
+               return;
+
+       /*
+        *  We know there is only one task runnable and we've just picked it. The
+        *  normal enqueue path will have cleared TICK_DEP_BIT_SCHED if we will
+        *  be otherwise able to stop the tick. Just need to check if we are using
+        *  bandwidth control.
+        */
+       if (cfs_task_bw_constrained(p))
+               tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
+}
+#endif
+
 #else /* CONFIG_CFS_BANDWIDTH */
 
 static inline bool cfs_bandwidth_used(void)
@@ -6186,9 +6336,8 @@ static inline int throttled_lb_pair(struct task_group *tg,
        return 0;
 }
 
-void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
+void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent) {}
 static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
 #endif
 
@@ -6199,9 +6348,18 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
 static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
 static inline void update_runtime_enabled(struct rq *rq) {}
 static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
-
+#ifdef CONFIG_CGROUP_SCHED
+bool cfs_task_bw_constrained(struct task_struct *p)
+{
+       return false;
+}
+#endif
 #endif /* CONFIG_CFS_BANDWIDTH */
 
+#if !defined(CONFIG_CFS_BANDWIDTH) || !defined(CONFIG_NO_HZ_FULL)
+static inline void sched_fair_update_stop_tick(struct rq *rq, struct task_struct *p) {}
+#endif
+
 /**************************************************
  * CFS operations on tasks:
  */
@@ -6210,13 +6368,12 @@ static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
 static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 {
        struct sched_entity *se = &p->se;
-       struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
        SCHED_WARN_ON(task_rq(p) != rq);
 
        if (rq->cfs.h_nr_running > 1) {
-               u64 slice = sched_slice(cfs_rq, se);
                u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+               u64 slice = se->slice;
                s64 delta = slice - ran;
 
                if (delta < 0) {
@@ -6240,8 +6397,7 @@ static void hrtick_update(struct rq *rq)
        if (!hrtick_enabled_fair(rq) || curr->sched_class != &fair_sched_class)
                return;
 
-       if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
-               hrtick_start_fair(rq, curr);
+       hrtick_start_fair(rq, curr);
 }
 #else /* !CONFIG_SCHED_HRTICK */
 static inline void
@@ -6282,17 +6438,6 @@ static int sched_idle_rq(struct rq *rq)
                        rq->nr_running);
 }
 
-/*
- * Returns true if cfs_rq only has SCHED_IDLE entities enqueued. Note the use
- * of idle_nr_running, which does not consider idle descendants of normal
- * entities.
- */
-static bool sched_idle_cfs_rq(struct cfs_rq *cfs_rq)
-{
-       return cfs_rq->nr_running &&
-               cfs_rq->nr_running == cfs_rq->idle_nr_running;
-}
-
 #ifdef CONFIG_SMP
 static int sched_idle_cpu(int cpu)
 {
@@ -7065,7 +7210,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
        util_min = uclamp_eff_value(p, UCLAMP_MIN);
        util_max = uclamp_eff_value(p, UCLAMP_MAX);
 
-       for_each_cpu_wrap(cpu, cpus, target + 1) {
+       for_each_cpu_wrap(cpu, cpus, target) {
                unsigned long cpu_cap = capacity_of(cpu);
 
                if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
@@ -7174,7 +7319,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
            recent_used_cpu != target &&
            cpus_share_cache(recent_used_cpu, target) &&
            (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
-           cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
+           cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) &&
            asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
                return recent_used_cpu;
        }
@@ -7289,9 +7434,6 @@ cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
 
                util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
 
-               if (boost)
-                       util_est = max(util_est, runnable);
-
                /*
                 * During wake-up @p isn't enqueued yet and doesn't contribute
                 * to any cpu_rq(cpu)->cfs.avg.util_est.enqueued.
@@ -7741,6 +7883,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
        if (wake_flags & WF_TTWU) {
                record_wakee(p);
 
+               if ((wake_flags & WF_CURRENT_CPU) &&
+                   cpumask_test_cpu(cpu, p->cpus_ptr))
+                       return cpu;
+
                if (sched_energy_enabled()) {
                        new_cpu = find_energy_efficient_cpu(p, prev_cpu);
                        if (new_cpu >= 0)
@@ -7798,18 +7944,6 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
 {
        struct sched_entity *se = &p->se;
 
-       /*
-        * As blocked tasks retain absolute vruntime the migration needs to
-        * deal with this by subtracting the old and adding the new
-        * min_vruntime -- the latter is done by enqueue_entity() when placing
-        * the task on the new runqueue.
-        */
-       if (READ_ONCE(p->__state) == TASK_WAKING) {
-               struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
-               se->vruntime -= u64_u32_load(cfs_rq->min_vruntime);
-       }
-
        if (!task_on_rq_migrating(p)) {
                remove_entity_load_avg(se);
 
@@ -7847,66 +7981,6 @@ balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 }
 #endif /* CONFIG_SMP */
 
-static unsigned long wakeup_gran(struct sched_entity *se)
-{
-       unsigned long gran = sysctl_sched_wakeup_granularity;
-
-       /*
-        * Since its curr running now, convert the gran from real-time
-        * to virtual-time in his units.
-        *
-        * By using 'se' instead of 'curr' we penalize light tasks, so
-        * they get preempted easier. That is, if 'se' < 'curr' then
-        * the resulting gran will be larger, therefore penalizing the
-        * lighter, if otoh 'se' > 'curr' then the resulting gran will
-        * be smaller, again penalizing the lighter task.
-        *
-        * This is especially important for buddies when the leftmost
-        * task is higher priority than the buddy.
-        */
-       return calc_delta_fair(gran, se);
-}
-
-/*
- * Should 'se' preempt 'curr'.
- *
- *             |s1
- *        |s2
- *   |s3
- *         g
- *      |<--->|c
- *
- *  w(c, s1) = -1
- *  w(c, s2) =  0
- *  w(c, s3) =  1
- *
- */
-static int
-wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
-{
-       s64 gran, vdiff = curr->vruntime - se->vruntime;
-
-       if (vdiff <= 0)
-               return -1;
-
-       gran = wakeup_gran(se);
-       if (vdiff > gran)
-               return 1;
-
-       return 0;
-}
-
-static void set_last_buddy(struct sched_entity *se)
-{
-       for_each_sched_entity(se) {
-               if (SCHED_WARN_ON(!se->on_rq))
-                       return;
-               if (se_is_idle(se))
-                       return;
-               cfs_rq_of(se)->last = se;
-       }
-}
-
 static void set_next_buddy(struct sched_entity *se)
 {
        for_each_sched_entity(se) {
@@ -7918,12 +7992,6 @@ static void set_next_buddy(struct sched_entity *se)
        }
 }
 
-static void set_skip_buddy(struct sched_entity *se)
-{
-       for_each_sched_entity(se)
-               cfs_rq_of(se)->skip = se;
-}
-
 /*
  * Preempt the current task with a newly woken task if needed:
  */
@@ -7932,7 +8000,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
        struct task_struct *curr = rq->curr;
        struct sched_entity *se = &curr->se, *pse = &p->se;
        struct cfs_rq *cfs_rq = task_cfs_rq(curr);
-       int scale = cfs_rq->nr_running >= sched_nr_latency;
        int next_buddy_marked = 0;
        int cse_is_idle, pse_is_idle;
 
@@ -7948,7 +8015,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
        if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
                return;
 
-       if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
+       if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) {
                set_next_buddy(pse);
                next_buddy_marked = 1;
        }
@@ -7993,35 +8060,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
        if (cse_is_idle != pse_is_idle)
                return;
 
-       update_curr(cfs_rq_of(se));
-       if (wakeup_preempt_entity(se, pse) == 1) {
-               /*
-                * Bias pick_next to pick the sched entity that is
-                * triggering this preemption.
-                */
-               if (!next_buddy_marked)
-                       set_next_buddy(pse);
+       cfs_rq = cfs_rq_of(se);
+       update_curr(cfs_rq);
+
+       /*
+        * XXX pick_eevdf(cfs_rq) != se ?
+        */
+       if (pick_eevdf(cfs_rq) == pse)
                goto preempt;
-       }
 
        return;
 
 preempt:
        resched_curr(rq);
-       /*
-        * Only set the backward buddy when the current task is still
-        * on the rq. This can happen when a wakeup gets interleaved
-        * with schedule on the ->pre_schedule() or idle_balance()
-        * point, either of which can * drop the rq lock.
-        *
-        * Also, during early boot the idle thread is in the fair class,
-        * for obvious reasons its a bad idea to schedule back to it.
-        */
-       if (unlikely(!se->on_rq || curr == rq->idle))
-               return;
-
-       if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
-               set_last_buddy(se);
 }
 
 #ifdef CONFIG_SMP
@@ -8172,6 +8223,7 @@ done: __maybe_unused;
                hrtick_start_fair(rq, p);
 
        update_misfit_status(p, rq);
+       sched_fair_update_stop_tick(rq, p);
 
        return p;
 
@@ -8222,8 +8274,6 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
 
 /*
  * sched_yield() is very simple
- *
- * The magic of dealing with the ->skip buddy is in pick_next_entity.
  */
 static void yield_task_fair(struct rq *rq)
 {
@@ -8239,21 +8289,19 @@ static void yield_task_fair(struct rq *rq)
 
        clear_buddies(cfs_rq, se);
 
-       if (curr->policy != SCHED_BATCH) {
-               update_rq_clock(rq);
-               /*
-                * Update run-time statistics of the 'current'.
-                */
-               update_curr(cfs_rq);
-               /*
-                * Tell update_rq_clock() that we've just updated,
-                * so we don't do microscopic update in schedule()
-                * and double the fastpath cost.
-                */
-               rq_clock_skip_update(rq);
-       }
+       update_rq_clock(rq);
+       /*
+        * Update run-time statistics of the 'current'.
+        */
+       update_curr(cfs_rq);
+       /*
+        * Tell update_rq_clock() that we've just updated,
+        * so we don't do microscopic update in schedule()
+        * and double the fastpath cost.
+        */
+       rq_clock_skip_update(rq);
 
-       set_skip_buddy(se);
+       se->deadline += calc_delta_fair(se->slice, se);
 }
 
 static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
@@ -8416,6 +8464,11 @@ enum group_type {
         */
        group_misfit_task,
        /*
+        * Balance SMT group that's fully busy. Can benefit from migration
+        * a task on SMT with busy sibling to another CPU on idle core.
+        */
+       group_smt_balance,
+       /*
         * SD_ASYM_PACKING only: One local CPU with higher capacity is available,
         * and the task should be migrated to it instead of running on the
         * current CPU.
@@ -8496,8 +8549,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
         * Buddy candidates are cache hot:
         */
        if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running &&
-                       (&p->se == cfs_rq_of(&p->se)->next ||
-                        &p->se == cfs_rq_of(&p->se)->last))
+           (&p->se == cfs_rq_of(&p->se)->next))
                return 1;
 
        if (sysctl_sched_migration_cost == -1)
@@ -9123,6 +9175,7 @@ struct sg_lb_stats {
        unsigned int group_weight;
        enum group_type group_type;
        unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
+       unsigned int group_smt_balance;  /* Task on busy SMT be moved */
        unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
 #ifdef CONFIG_NUMA_BALANCING
        unsigned int nr_numa_running;
@@ -9396,6 +9449,9 @@ group_type group_classify(unsigned int imbalance_pct,
        if (sgs->group_asym_packing)
                return group_asym_packing;
 
+       if (sgs->group_smt_balance)
+               return group_smt_balance;
+
        if (sgs->group_misfit_task_load)
                return group_misfit_task;
 
@@ -9465,6 +9521,71 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds,  struct sg_lb_stats *sgs
        return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu);
 }
 
+/* One group has more than one SMT CPU while the other group does not */
+static inline bool smt_vs_nonsmt_groups(struct sched_group *sg1,
+                                   struct sched_group *sg2)
+{
+       if (!sg1 || !sg2)
+               return false;
+
+       return (sg1->flags & SD_SHARE_CPUCAPACITY) !=
+               (sg2->flags & SD_SHARE_CPUCAPACITY);
+}
+
+static inline bool smt_balance(struct lb_env *env, struct sg_lb_stats *sgs,
+                              struct sched_group *group)
+{
+       if (env->idle == CPU_NOT_IDLE)
+               return false;
+
+       /*
+        * For SMT source group, it is better to move a task
+        * to a CPU that doesn't have multiple tasks sharing its CPU capacity.
+        * Note that if a group has a single SMT, SD_SHARE_CPUCAPACITY
+        * will not be on.
+        */
+       if (group->flags & SD_SHARE_CPUCAPACITY &&
+           sgs->sum_h_nr_running > 1)
+               return true;
+
+       return false;
+}
+
+static inline long sibling_imbalance(struct lb_env *env,
+                                   struct sd_lb_stats *sds,
+                                   struct sg_lb_stats *busiest,
+                                   struct sg_lb_stats *local)
+{
+       int ncores_busiest, ncores_local;
+       long imbalance;
+
+       if (env->idle == CPU_NOT_IDLE || !busiest->sum_nr_running)
+               return 0;
+
+       ncores_busiest = sds->busiest->cores;
+       ncores_local = sds->local->cores;
+
+       if (ncores_busiest == ncores_local) {
+               imbalance = busiest->sum_nr_running;
+               lsub_positive(&imbalance, local->sum_nr_running);
+               return imbalance;
+       }
+
+       /* Balance such that nr_running/ncores ratio are same on both groups */
+       imbalance = ncores_local * busiest->sum_nr_running;
+       lsub_positive(&imbalance, ncores_busiest * local->sum_nr_running);
+       /* Normalize imbalance and do rounding on normalization */
+       imbalance = 2 * imbalance + ncores_local + ncores_busiest;
+       imbalance /= ncores_local + ncores_busiest;
+
+       /* Take advantage of resource in an empty sched group */
+       if (imbalance == 0 && local->sum_nr_running == 0 &&
+           busiest->sum_nr_running > 1)
+               imbalance = 2;
+
+       return imbalance;
+}
+
 static inline bool
 sched_reduced_capacity(struct rq *rq, struct sched_domain *sd)
 {
@@ -9557,6 +9678,10 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                sgs->group_asym_packing = 1;
        }
 
+       /* Check for loaded SMT group to be balanced to dst CPU */
+       if (!local_group && smt_balance(env, sgs, group))
+               sgs->group_smt_balance = 1;
+
        sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
 
        /* Computing avg_load makes sense only when group is overloaded */
@@ -9641,6 +9766,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
                        return false;
                break;
 
+       case group_smt_balance:
        case group_fully_busy:
                /*
                 * Select the fully busy group with highest avg_load. In
@@ -9670,6 +9796,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
 
        case group_has_spare:
                /*
+                * Do not pick sg with SMT CPUs over sg with pure CPUs,
+                * as we do not want to pull task off SMT core with one task
+                * and make the core idle.
+                */
+               if (smt_vs_nonsmt_groups(sds->busiest, sg)) {
+                       if (sg->flags & SD_SHARE_CPUCAPACITY && sgs->sum_h_nr_running <= 1)
+                               return false;
+                       else
+                               return true;
+               }
+
+               /*
                 * Select not overloaded group with lowest number of idle cpus
                 * and highest number of running tasks. We could also compare
                 * the spare capacity which is more stable but it can end up
@@ -9865,6 +10003,7 @@ static bool update_pick_idlest(struct sched_group *idlest,
 
        case group_imbalanced:
        case group_asym_packing:
+       case group_smt_balance:
                /* Those types are not used in the slow wakeup path */
                return false;
 
@@ -9996,6 +10135,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
 
        case group_imbalanced:
        case group_asym_packing:
+       case group_smt_balance:
                /* Those type are not used in the slow wakeup path */
                return NULL;
 
@@ -10250,6 +10390,13 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
                return;
        }
 
+       if (busiest->group_type == group_smt_balance) {
+               /* Reduce number of tasks sharing CPU capacity */
+               env->migration_type = migrate_task;
+               env->imbalance = 1;
+               return;
+       }
+
        if (busiest->group_type == group_imbalanced) {
                /*
                 * In the group_imb case we cannot rely on group-wide averages
@@ -10297,14 +10444,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
                }
 
                if (busiest->group_weight == 1 || sds->prefer_sibling) {
-                       unsigned int nr_diff = busiest->sum_nr_running;
                        /*
                         * When prefer sibling, evenly spread running tasks on
                         * groups.
                         */
                        env->migration_type = migrate_task;
-                       lsub_positive(&nr_diff, local->sum_nr_running);
-                       env->imbalance = nr_diff;
+                       env->imbalance = sibling_imbalance(env, sds, busiest, local);
                } else {
 
                        /*
@@ -10501,20 +10646,27 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
         * group's child domain.
         */
        if (sds.prefer_sibling && local->group_type == group_has_spare &&
-           busiest->sum_nr_running > local->sum_nr_running + 1)
+           sibling_imbalance(env, &sds, busiest, local) > 1)
                goto force_balance;
 
        if (busiest->group_type != group_overloaded) {
-               if (env->idle == CPU_NOT_IDLE)
+               if (env->idle == CPU_NOT_IDLE) {
                        /*
                         * If the busiest group is not overloaded (and as a
                         * result the local one too) but this CPU is already
                         * busy, let another idle CPU try to pull task.
                         */
                        goto out_balanced;
+               }
+
+               if (busiest->group_type == group_smt_balance &&
+                   smt_vs_nonsmt_groups(sds.local, sds.busiest)) {
+                       /* Let non SMT CPU pull from SMT CPU sharing with sibling */
+                       goto force_balance;
+               }
 
                if (busiest->group_weight > 1 &&
-                   local->idle_cpus <= (busiest->idle_cpus + 1))
+                   local->idle_cpus <= (busiest->idle_cpus + 1)) {
                        /*
                         * If the busiest group is not overloaded
                         * and there is no imbalance between this and busiest
@@ -10525,12 +10677,14 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
                         * there is more than 1 CPU per group.
                         */
                        goto out_balanced;
+               }
 
-               if (busiest->sum_h_nr_running == 1)
+               if (busiest->sum_h_nr_running == 1) {
                        /*
                         * busiest doesn't have any tasks waiting to run
                         */
                        goto out_balanced;
+               }
        }
 
 force_balance:
@@ -10764,7 +10918,7 @@ static int active_load_balance_cpu_stop(void *data);
 static int should_we_balance(struct lb_env *env)
 {
        struct sched_group *sg = env->sd->groups;
-       int cpu;
+       int cpu, idle_smt = -1;
 
        /*
         * Ensure the balancing environment is consistent; can happen
@@ -10791,10 +10945,24 @@ static int should_we_balance(struct lb_env *env)
                if (!idle_cpu(cpu))
                        continue;
 
+               /*
+                * Don't balance to idle SMT in busy core right away when
+                * balancing cores, but remember the first idle SMT CPU for
+                * later consideration.  Find CPU on an idle core first.
+                */
+               if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !is_core_idle(cpu)) {
+                       if (idle_smt == -1)
+                               idle_smt = cpu;
+                       continue;
+               }
+
                /* Are we the first idle CPU? */
                return cpu == env->dst_cpu;
        }
 
+       if (idle_smt == env->dst_cpu)
+               return true;
+
        /* Are we the first CPU of this group ? */
        return group_balance_cpu(sg) == env->dst_cpu;
 }
@@ -12007,8 +12175,8 @@ static void rq_offline_fair(struct rq *rq)
 static inline bool
 __entity_slice_used(struct sched_entity *se, int min_nr_tasks)
 {
-       u64 slice = sched_slice(cfs_rq_of(se), se);
        u64 rtime = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+       u64 slice = se->slice;
 
        return (rtime * min_nr_tasks > slice);
 }
@@ -12164,8 +12332,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
  */
 static void task_fork_fair(struct task_struct *p)
 {
-       struct cfs_rq *cfs_rq;
        struct sched_entity *se = &p->se, *curr;
+       struct cfs_rq *cfs_rq;
        struct rq *rq = this_rq();
        struct rq_flags rf;
 
@@ -12174,22 +12342,9 @@ static void task_fork_fair(struct task_struct *p)
 
        cfs_rq = task_cfs_rq(current);
        curr = cfs_rq->curr;
-       if (curr) {
+       if (curr)
                update_curr(cfs_rq);
-               se->vruntime = curr->vruntime;
-       }
-       place_entity(cfs_rq, se, 1);
-
-       if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
-               /*
-                * Upon rescheduling, sched_class::put_prev_task() will place
-                * 'current' within the tree based on its new key value.
-                */
-               swap(curr->vruntime, se->vruntime);
-               resched_curr(rq);
-       }
-
-       se->vruntime -= cfs_rq->min_vruntime;
+       place_entity(cfs_rq, se, ENQUEUE_INITIAL);
        rq_unlock(rq, &rf);
 }
 
@@ -12218,34 +12373,6 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
                check_preempt_curr(rq, p, 0);
 }
 
-static inline bool vruntime_normalized(struct task_struct *p)
-{
-       struct sched_entity *se = &p->se;
-
-       /*
-        * In both the TASK_ON_RQ_QUEUED and TASK_ON_RQ_MIGRATING cases,
-        * the dequeue_entity(.flags=0) will already have normalized the
-        * vruntime.
-        */
-       if (p->on_rq)
-               return true;
-
-       /*
-        * When !on_rq, vruntime of the task has usually NOT been normalized.
-        * But there are some cases where it has already been normalized:
-        *
-        * - A forked child which is waiting for being woken up by
-        *   wake_up_new_task().
-        * - A task which has been woken up by try_to_wake_up() and
-        *   waiting for actually being woken up by sched_ttwu_pending().
-        */
-       if (!se->sum_exec_runtime ||
-           (READ_ONCE(p->__state) == TASK_WAKING && p->sched_remote_wakeup))
-               return true;
-
-       return false;
-}
-
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * Propagate the changes of the sched_entity across the tg tree to make it
@@ -12316,16 +12443,6 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
 static void detach_task_cfs_rq(struct task_struct *p)
 {
        struct sched_entity *se = &p->se;
-       struct cfs_rq *cfs_rq = cfs_rq_of(se);
-
-       if (!vruntime_normalized(p)) {
-               /*
-                * Fix up our vruntime so that the current sleep doesn't
-                * cause 'unlimited' sleep bonus.
-                */
-               place_entity(cfs_rq, se, 0);
-               se->vruntime -= cfs_rq->min_vruntime;
-       }
 
        detach_entity_cfs_rq(se);
 }
@@ -12333,12 +12450,8 @@ static void detach_task_cfs_rq(struct task_struct *p)
 static void attach_task_cfs_rq(struct task_struct *p)
 {
        struct sched_entity *se = &p->se;
-       struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
        attach_entity_cfs_rq(se);
-
-       if (!vruntime_normalized(p))
-               se->vruntime += cfs_rq->min_vruntime;
 }
 
 static void switched_from_fair(struct rq *rq, struct task_struct *p)
@@ -12450,7 +12563,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 
        tg->shares = NICE_0_LOAD;
 
-       init_cfs_bandwidth(tg_cfs_bandwidth(tg));
+       init_cfs_bandwidth(tg_cfs_bandwidth(tg), tg_cfs_bandwidth(parent));
 
        for_each_possible_cpu(i) {
                cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
@@ -12703,7 +12816,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task
         * idle runqueue:
         */
        if (rq->cfs.load.weight)
-               rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
+               rr_interval = NS_TO_JIFFIES(se->slice);
 
        return rr_interval;
 }
index ee7f23c..f770168 100644 (file)
@@ -1,16 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Only give sleepers 50% of their service deficit. This allows
- * them to run sooner, but does not allow tons of sleepers to
- * rip the spread apart.
- */
-SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
 
 /*
- * Place new tasks ahead so that they do not starve already running
- * tasks
+ * Using the avg_vruntime, do the right thing and preserve lag across
+ * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
  */
-SCHED_FEAT(START_DEBIT, true)
+SCHED_FEAT(PLACE_LAG, true)
+SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
+SCHED_FEAT(RUN_TO_PARITY, true)
 
 /*
  * Prefer to schedule the task we woke last (assuming it failed
@@ -20,13 +16,6 @@ SCHED_FEAT(START_DEBIT, true)
 SCHED_FEAT(NEXT_BUDDY, false)
 
 /*
- * Prefer to schedule the task that ran last (when we did
- * wake-preempt) as that likely will touch the same data, increases
- * cache locality.
- */
-SCHED_FEAT(LAST_BUDDY, true)
-
-/*
  * Consider buddies to be cache hot, decreases the likeliness of a
  * cache buddy being migrated away, increases cache locality.
  */
@@ -99,5 +88,4 @@ SCHED_FEAT(UTIL_EST_FASTUP, true)
 
 SCHED_FEAT(LATENCY_WARN, false)
 
-SCHED_FEAT(ALT_PERIOD, true)
-SCHED_FEAT(BASE_SLICE, true)
+SCHED_FEAT(HZ_BW, true)
index 81fca77..1d0f634 100644 (file)
 static int psi_bug __read_mostly;
 
 DEFINE_STATIC_KEY_FALSE(psi_disabled);
-DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
+static DEFINE_STATIC_KEY_TRUE(psi_cgroups_enabled);
 
 #ifdef CONFIG_PSI_DEFAULT_DISABLED
 static bool psi_enable;
@@ -493,8 +493,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
                        continue;
 
                /* Generate an event */
-               if (cmpxchg(&t->event, 0, 1) == 0)
-                       wake_up_interruptible(&t->event_wait);
+               if (cmpxchg(&t->event, 0, 1) == 0) {
+                       if (t->of)
+                               kernfs_notify(t->of->kn);
+                       else
+                               wake_up_interruptible(&t->event_wait);
+               }
                t->last_event_time = now;
                /* Reset threshold breach flag once event got generated */
                t->pending_event = false;
@@ -1271,8 +1275,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
        return 0;
 }
 
-struct psi_trigger *psi_trigger_create(struct psi_group *group,
-                       char *buf, enum psi_res res, struct file *file)
+struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
+                                      enum psi_res res, struct file *file,
+                                      struct kernfs_open_file *of)
 {
        struct psi_trigger *t;
        enum psi_states state;
@@ -1331,7 +1336,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 
        t->event = 0;
        t->last_event_time = 0;
-       init_waitqueue_head(&t->event_wait);
+       t->of = of;
+       if (!of)
+               init_waitqueue_head(&t->event_wait);
        t->pending_event = false;
        t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
 
@@ -1388,7 +1395,10 @@ void psi_trigger_destroy(struct psi_trigger *t)
         * being accessed later. Can happen if cgroup is deleted from under a
         * polling process.
         */
-       wake_up_pollfree(&t->event_wait);
+       if (t->of)
+               kernfs_notify(t->of->kn);
+       else
+               wake_up_interruptible(&t->event_wait);
 
        if (t->aggregator == PSI_AVGS) {
                mutex_lock(&group->avgs_lock);
@@ -1465,7 +1475,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
        if (!t)
                return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
 
-       poll_wait(file, &t->event_wait, wait);
+       if (t->of)
+               kernfs_generic_poll(t->of, wait);
+       else
+               poll_wait(file, &t->event_wait, wait);
 
        if (cmpxchg(&t->event, 1, 0) == 1)
                ret |= EPOLLPRI;
@@ -1535,7 +1548,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
                return -EBUSY;
        }
 
-       new = psi_trigger_create(&psi_system, buf, res, file);
+       new = psi_trigger_create(&psi_system, buf, res, file, NULL);
        if (IS_ERR(new)) {
                mutex_unlock(&seq->lock);
                return PTR_ERR(new);
index 00e0e50..0597ba0 100644 (file)
@@ -25,7 +25,7 @@ unsigned int sysctl_sched_rt_period = 1000000;
 int sysctl_sched_rt_runtime = 950000;
 
 #ifdef CONFIG_SYSCTL
-static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE;
+static int sysctl_sched_rr_timeslice = (MSEC_PER_SEC * RR_TIMESLICE) / HZ;
 static int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
                size_t *lenp, loff_t *ppos);
 static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
@@ -3062,6 +3062,9 @@ static int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
                sched_rr_timeslice =
                        sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE :
                        msecs_to_jiffies(sysctl_sched_rr_timeslice);
+
+               if (sysctl_sched_rr_timeslice <= 0)
+                       sysctl_sched_rr_timeslice = jiffies_to_msecs(RR_TIMESLICE);
        }
        mutex_unlock(&mutex);
 
index e93e006..0484627 100644 (file)
@@ -454,11 +454,12 @@ extern void unregister_fair_sched_group(struct task_group *tg);
 extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
                        struct sched_entity *se, int cpu,
                        struct sched_entity *parent);
-extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
+extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b, struct cfs_bandwidth *parent);
 
 extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
 extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
 extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
+extern bool cfs_task_bw_constrained(struct task_struct *p);
 
 extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
                struct sched_rt_entity *rt_se, int cpu,
@@ -494,6 +495,7 @@ static inline void set_task_rq_fair(struct sched_entity *se,
 #else /* CONFIG_CGROUP_SCHED */
 
 struct cfs_bandwidth { };
+static inline bool cfs_task_bw_constrained(struct task_struct *p) { return false; }
 
 #endif /* CONFIG_CGROUP_SCHED */
 
@@ -548,6 +550,9 @@ struct cfs_rq {
        unsigned int            idle_nr_running;   /* SCHED_IDLE */
        unsigned int            idle_h_nr_running; /* SCHED_IDLE */
 
+       s64                     avg_vruntime;
+       u64                     avg_load;
+
        u64                     exec_clock;
        u64                     min_vruntime;
 #ifdef CONFIG_SCHED_CORE
@@ -567,8 +572,6 @@ struct cfs_rq {
         */
        struct sched_entity     *curr;
        struct sched_entity     *next;
-       struct sched_entity     *last;
-       struct sched_entity     *skip;
 
 #ifdef CONFIG_SCHED_DEBUG
        unsigned int            nr_spread_over;
@@ -636,6 +639,8 @@ struct cfs_rq {
        u64                     throttled_clock;
        u64                     throttled_clock_pelt;
        u64                     throttled_clock_pelt_time;
+       u64                     throttled_clock_self;
+       u64                     throttled_clock_self_time;
        int                     throttled;
        int                     throttle_count;
        struct list_head        throttled_list;
@@ -1245,6 +1250,7 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
 
 bool cfs_prio_less(const struct task_struct *a, const struct task_struct *b,
                        bool fi);
+void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi);
 
 /*
  * Helpers to check if the CPU's core cookie matches with the task's cookie
@@ -1700,6 +1706,21 @@ rq_unlock(struct rq *rq, struct rq_flags *rf)
        raw_spin_rq_unlock(rq);
 }
 
+DEFINE_LOCK_GUARD_1(rq_lock, struct rq,
+                   rq_lock(_T->lock, &_T->rf),
+                   rq_unlock(_T->lock, &_T->rf),
+                   struct rq_flags rf)
+
+DEFINE_LOCK_GUARD_1(rq_lock_irq, struct rq,
+                   rq_lock_irq(_T->lock, &_T->rf),
+                   rq_unlock_irq(_T->lock, &_T->rf),
+                   struct rq_flags rf)
+
+DEFINE_LOCK_GUARD_1(rq_lock_irqsave, struct rq,
+                   rq_lock_irqsave(_T->lock, &_T->rf),
+                   rq_unlock_irqrestore(_T->lock, &_T->rf),
+                   struct rq_flags rf)
+
 static inline struct rq *
 this_rq_lock_irq(struct rq_flags *rf)
        __acquires(rq->lock)
@@ -1882,6 +1903,7 @@ struct sched_group {
        atomic_t                ref;
 
        unsigned int            group_weight;
+       unsigned int            cores;
        struct sched_group_capacity *sgc;
        int                     asym_prefer_cpu;        /* CPU of highest priority in group */
        int                     flags;
@@ -2131,12 +2153,13 @@ static inline int task_on_rq_migrating(struct task_struct *p)
 }
 
 /* Wake flags. The first three directly map to some SD flag value */
-#define WF_EXEC     0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
-#define WF_FORK     0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
-#define WF_TTWU     0x08 /* Wakeup;            maps to SD_BALANCE_WAKE */
+#define WF_EXEC         0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
+#define WF_FORK         0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
+#define WF_TTWU         0x08 /* Wakeup;            maps to SD_BALANCE_WAKE */
 
-#define WF_SYNC     0x10 /* Waker goes to sleep after wakeup */
-#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
+#define WF_SYNC         0x10 /* Waker goes to sleep after wakeup */
+#define WF_MIGRATED     0x20 /* Internal use, task got migrated */
+#define WF_CURRENT_CPU  0x40 /* Prefer to move the wakee to the current CPU. */
 
 #ifdef CONFIG_SMP
 static_assert(WF_EXEC == SD_BALANCE_EXEC);
@@ -2195,6 +2218,7 @@ extern const u32          sched_prio_to_wmult[40];
 #else
 #define ENQUEUE_MIGRATED       0x00
 #endif
+#define ENQUEUE_INITIAL                0x80
 
 #define RETRY_TASK             ((void *)-1UL)
 
@@ -2398,6 +2422,7 @@ static inline struct cpuidle_state *idle_get_state(struct rq *rq)
 #endif
 
 extern void schedule_idle(void);
+asmlinkage void schedule_user(void);
 
 extern void sysrq_sched_debug_show(void);
 extern void sched_init_granularity(void);
@@ -2499,11 +2524,9 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
 extern const_debug unsigned int sysctl_sched_nr_migrate;
 extern const_debug unsigned int sysctl_sched_migration_cost;
 
+extern unsigned int sysctl_sched_base_slice;
+
 #ifdef CONFIG_SCHED_DEBUG
-extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_min_granularity;
-extern unsigned int sysctl_sched_idle_min_granularity;
-extern unsigned int sysctl_sched_wakeup_granularity;
 extern int sysctl_resched_latency_warn_ms;
 extern int sysctl_resched_latency_warn_once;
 
@@ -2609,6 +2632,12 @@ static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2)
 static inline void double_rq_clock_clear_update(struct rq *rq1, struct rq *rq2) {}
 #endif
 
+#define DEFINE_LOCK_GUARD_2(name, type, _lock, _unlock, ...)           \
+__DEFINE_UNLOCK_GUARD(name, type, _unlock, type *lock2; __VA_ARGS__) \
+static inline class_##name##_t class_##name##_constructor(type *lock, type *lock2) \
+{ class_##name##_t _t = { .lock = lock, .lock2 = lock2 }, *_T = &_t;   \
+  _lock; return _t; }
+
 #ifdef CONFIG_SMP
 
 static inline bool rq_order_less(struct rq *rq1, struct rq *rq2)
@@ -2738,6 +2767,16 @@ static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
        raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
 }
 
+static inline void double_raw_unlock(raw_spinlock_t *l1, raw_spinlock_t *l2)
+{
+       raw_spin_unlock(l1);
+       raw_spin_unlock(l2);
+}
+
+DEFINE_LOCK_GUARD_2(double_raw_spinlock, raw_spinlock_t,
+                   double_raw_lock(_T->lock, _T->lock2),
+                   double_raw_unlock(_T->lock, _T->lock2))
+
 /*
  * double_rq_unlock - safely unlock two runqueues
  *
@@ -2795,6 +2834,10 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
 
 #endif
 
+DEFINE_LOCK_GUARD_2(double_rq_lock, struct rq,
+                   double_rq_lock(_T->lock, _T->lock2),
+                   double_rq_unlock(_T->lock, _T->lock2))
+
 extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
 extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
 
@@ -3229,6 +3272,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
 extern void swake_up_all_locked(struct swait_queue_head *q);
 extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
 
+extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags);
+
 #ifdef CONFIG_PREEMPT_DYNAMIC
 extern int preempt_dynamic_mode;
 extern int sched_dynamic_mode(const char *str);
@@ -3480,4 +3525,7 @@ static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
 static inline void init_sched_mm_cid(struct task_struct *t) { }
 #endif
 
+extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
+extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
+
 #endif /* _KERNEL_SCHED_SCHED_H */
index 76b9b79..72505cd 100644 (file)
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(__init_swait_queue_head);
  * If for some reason it would return 0, that means the previously waiting
  * task is already running, so it will observe condition true (or has already).
  */
-void swake_up_locked(struct swait_queue_head *q)
+void swake_up_locked(struct swait_queue_head *q, int wake_flags)
 {
        struct swait_queue *curr;
 
@@ -26,7 +26,7 @@ void swake_up_locked(struct swait_queue_head *q)
                return;
 
        curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
-       wake_up_process(curr->task);
+       try_to_wake_up(curr->task, TASK_NORMAL, wake_flags);
        list_del_init(&curr->task_list);
 }
 EXPORT_SYMBOL(swake_up_locked);
@@ -41,7 +41,7 @@ EXPORT_SYMBOL(swake_up_locked);
 void swake_up_all_locked(struct swait_queue_head *q)
 {
        while (!list_empty(&q->task_list))
-               swake_up_locked(q);
+               swake_up_locked(q, 0);
 }
 
 void swake_up_one(struct swait_queue_head *q)
@@ -49,7 +49,7 @@ void swake_up_one(struct swait_queue_head *q)
        unsigned long flags;
 
        raw_spin_lock_irqsave(&q->lock, flags);
-       swake_up_locked(q);
+       swake_up_locked(q, 0);
        raw_spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(swake_up_one);
index d3a3b26..05a5bc6 100644 (file)
@@ -722,8 +722,7 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 
                        if (parent->parent) {
                                parent->parent->child = tmp;
-                               if (tmp->flags & SD_SHARE_CPUCAPACITY)
-                                       parent->parent->groups->flags |= SD_SHARE_CPUCAPACITY;
+                               parent->parent->groups->flags = tmp->flags;
                        }
 
                        /*
@@ -1275,14 +1274,24 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
 {
        struct sched_group *sg = sd->groups;
+       struct cpumask *mask = sched_domains_tmpmask2;
 
        WARN_ON(!sg);
 
        do {
-               int cpu, max_cpu = -1;
+               int cpu, cores = 0, max_cpu = -1;
 
                sg->group_weight = cpumask_weight(sched_group_span(sg));
 
+               cpumask_copy(mask, sched_group_span(sg));
+               for_each_cpu(cpu, mask) {
+                       cores++;
+#ifdef CONFIG_SCHED_SMT
+                       cpumask_andnot(mask, mask, cpu_smt_mask(cpu));
+#endif
+               }
+               sg->cores = cores;
+
                if (!(sd->flags & SD_ASYM_PACKING))
                        goto next;
 
index 48c53e4..802d98c 100644 (file)
@@ -161,6 +161,11 @@ int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
 }
 EXPORT_SYMBOL(__wake_up);
 
+void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key)
+{
+       __wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key);
+}
+
 /*
  * Same as __wake_up but called with the spinlock in wait_queue_head_t held.
  */
index d3e5840..255999b 100644 (file)
@@ -110,11 +110,13 @@ struct seccomp_knotif {
  * @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
  *         is allowed.
  * @ioctl_flags: The flags used for the seccomp_addfd ioctl.
+ * @setfd: whether or not SECCOMP_ADDFD_FLAG_SETFD was set during notify_addfd
  * @ret: The return value of the installing process. It is set to the fd num
  *       upon success (>= 0).
  * @completion: Indicates that the installing process has completed fd
  *              installation, or gone away (either due to successful
  *              reply, or signal)
+ * @list: list_head for chaining seccomp_kaddfd together.
  *
  */
 struct seccomp_kaddfd {
@@ -138,14 +140,17 @@ struct seccomp_kaddfd {
  * structure is fairly large, we store the notification-specific stuff in a
  * separate structure.
  *
- * @request: A semaphore that users of this notification can wait on for
- *           changes. Actual reads and writes are still controlled with
- *           filter->notify_lock.
+ * @requests: A semaphore that users of this notification can wait on for
+ *            changes. Actual reads and writes are still controlled with
+ *            filter->notify_lock.
+ * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
  * @next_id: The id of the next request.
  * @notifications: A list of struct seccomp_knotif elements.
  */
+
 struct notification {
-       struct semaphore request;
+       atomic_t requests;
+       u32 flags;
        u64 next_id;
        struct list_head notifications;
 };
@@ -555,6 +560,8 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)
  *                         drop its reference count, and notify
  *                         about unused filters
  *
+ * @tsk: task the filter should be released from.
+ *
  * This function should only be called when the task is exiting as
  * it detaches it from its filter tree. As such, READ_ONCE() and
  * barriers are not needed here, as would normally be needed.
@@ -574,6 +581,8 @@ void seccomp_filter_release(struct task_struct *tsk)
 /**
  * seccomp_sync_threads: sets all threads to use current's filter
  *
+ * @flags: SECCOMP_FILTER_FLAG_* flags to set during sync.
+ *
  * Expects sighand and cred_guard_mutex locks to be held, and for
  * seccomp_can_sync_threads() to have returned success already
  * without dropping the locks.
@@ -1116,8 +1125,11 @@ static int seccomp_do_user_notification(int this_syscall,
        list_add_tail(&n.list, &match->notif->notifications);
        INIT_LIST_HEAD(&n.addfd);
 
-       up(&match->notif->request);
-       wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
+       atomic_inc(&match->notif->requests);
+       if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+               wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM);
+       else
+               wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
 
        /*
         * This is where we wait for a reply from userspace.
@@ -1450,6 +1462,37 @@ find_notification(struct seccomp_filter *filter, u64 id)
        return NULL;
 }
 
+static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
+                                 void *key)
+{
+       /* Avoid a wakeup if event not interesting for us. */
+       if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
+               return 0;
+       return autoremove_wake_function(wait, mode, sync, key);
+}
+
+static int recv_wait_event(struct seccomp_filter *filter)
+{
+       DEFINE_WAIT_FUNC(wait, recv_wake_function);
+       int ret;
+
+       if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
+               return 0;
+
+       for (;;) {
+               ret = prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE);
+
+               if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
+                       break;
+
+               if (ret)
+                       return ret;
+
+               schedule();
+       }
+       finish_wait(&filter->wqh, &wait);
+       return 0;
+}
 
 static long seccomp_notify_recv(struct seccomp_filter *filter,
                                void __user *buf)
@@ -1467,7 +1510,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
 
        memset(&unotif, 0, sizeof(unotif));
 
-       ret = down_interruptible(&filter->notif->request);
+       ret = recv_wait_event(filter);
        if (ret < 0)
                return ret;
 
@@ -1515,7 +1558,8 @@ out:
                        if (should_sleep_killable(filter, knotif))
                                complete(&knotif->ready);
                        knotif->state = SECCOMP_NOTIFY_INIT;
-                       up(&filter->notif->request);
+                       atomic_inc(&filter->notif->requests);
+                       wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM);
                }
                mutex_unlock(&filter->notify_lock);
        }
@@ -1561,7 +1605,10 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
        knotif->error = resp.error;
        knotif->val = resp.val;
        knotif->flags = resp.flags;
-       complete(&knotif->ready);
+       if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+               complete_on_current_cpu(&knotif->ready);
+       else
+               complete(&knotif->ready);
 out:
        mutex_unlock(&filter->notify_lock);
        return ret;
@@ -1591,6 +1638,22 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
        return ret;
 }
 
+static long seccomp_notify_set_flags(struct seccomp_filter *filter,
+                                   unsigned long flags)
+{
+       long ret;
+
+       if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+               return -EINVAL;
+
+       ret = mutex_lock_interruptible(&filter->notify_lock);
+       if (ret < 0)
+               return ret;
+       filter->notif->flags = flags;
+       mutex_unlock(&filter->notify_lock);
+       return 0;
+}
+
 static long seccomp_notify_addfd(struct seccomp_filter *filter,
                                 struct seccomp_notif_addfd __user *uaddfd,
                                 unsigned int size)
@@ -1720,6 +1783,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
        case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
        case SECCOMP_IOCTL_NOTIF_ID_VALID:
                return seccomp_notify_id_valid(filter, buf);
+       case SECCOMP_IOCTL_NOTIF_SET_FLAGS:
+               return seccomp_notify_set_flags(filter, arg);
        }
 
        /* Extensible Argument ioctls */
@@ -1777,7 +1842,6 @@ static struct file *init_listener(struct seccomp_filter *filter)
        if (!filter->notif)
                goto out;
 
-       sema_init(&filter->notif->request, 0);
        filter->notif->next_id = get_random_u64();
        INIT_LIST_HEAD(&filter->notif->notifications);
 
index b5370fe..128e9bb 100644 (file)
@@ -562,6 +562,10 @@ bool unhandled_signal(struct task_struct *tsk, int sig)
        if (handler != SIG_IGN && handler != SIG_DFL)
                return false;
 
+       /* If dying, we handle all new signals by ignoring them */
+       if (fatal_signal_pending(tsk))
+               return false;
+
        /* if ptraced, let the tracer determine */
        return !tsk->ptrace;
 }
index 385179d..8455a53 100644 (file)
@@ -46,6 +46,8 @@ static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
 
 static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
 
+static DEFINE_PER_CPU(atomic_t, trigger_backtrace) = ATOMIC_INIT(1);
+
 static void __flush_smp_call_function_queue(bool warn_cpu_offline);
 
 int smpcfd_prepare_cpu(unsigned int cpu)
@@ -253,13 +255,15 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
                         *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
        }
        if (cpu >= 0) {
-               dump_cpu_task(cpu);
+               if (atomic_cmpxchg_acquire(&per_cpu(trigger_backtrace, cpu), 1, 0))
+                       dump_cpu_task(cpu);
                if (!cpu_cur_csd) {
                        pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
                        arch_send_call_function_single_ipi(cpu);
                }
        }
-       dump_stack();
+       if (firsttime)
+               dump_stack();
        *ts1 = ts2;
 
        return false;
@@ -433,9 +437,14 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline)
        struct llist_node *entry, *prev;
        struct llist_head *head;
        static bool warned;
+       atomic_t *tbt;
 
        lockdep_assert_irqs_disabled();
 
+       /* Allow waiters to send backtrace NMI from here onwards */
+       tbt = this_cpu_ptr(&trigger_backtrace);
+       atomic_set_release(tbt, 1);
+
        head = this_cpu_ptr(&call_single_queue);
        entry = llist_del_all(head);
        entry = llist_reverse_order(entry);
index 807b34c..210cf5f 100644 (file)
@@ -612,7 +612,7 @@ static inline void tick_irq_exit(void)
        int cpu = smp_processor_id();
 
        /* Make sure that timer wheel updates are propagated */
-       if ((idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
+       if ((sched_core_idle_cpu(cpu) && !need_resched()) || tick_nohz_full_cpu(cpu)) {
                if (!in_hardirq())
                        tick_nohz_irq_exit();
        }
index 05f8389..2410e39 100644 (file)
@@ -2535,11 +2535,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
                        else
                                return -EINVAL;
                        break;
-       case PR_GET_AUXV:
-               if (arg4 || arg5)
-                       return -EINVAL;
-               error = prctl_get_auxv((void __user *)arg2, arg3);
-               break;
                default:
                        return -EINVAL;
                }
@@ -2694,6 +2689,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
        case PR_SET_VMA:
                error = prctl_set_vma(arg2, arg3, arg4, arg5);
                break;
+       case PR_GET_AUXV:
+               if (arg4 || arg5)
+                       return -EINVAL;
+               error = prctl_get_auxv((void __user *)arg2, arg3);
+               break;
 #ifdef CONFIG_KSM
        case PR_SET_MEMORY_MERGE:
                if (arg3 || arg4 || arg5)
index 88cbc11..c108ed8 100644 (file)
@@ -473,8 +473,8 @@ static void clocksource_watchdog(struct timer_list *unused)
                /* Check the deviation from the watchdog clocksource. */
                md = cs->uncertainty_margin + watchdog->uncertainty_margin;
                if (abs(cs_nsec - wd_nsec) > md) {
-                       u64 cs_wd_msec;
-                       u64 wd_msec;
+                       s64 cs_wd_msec;
+                       s64 wd_msec;
                        u32 wd_rem;
 
                        pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
@@ -483,8 +483,8 @@ static void clocksource_watchdog(struct timer_list *unused)
                                watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
                        pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
                                cs->name, cs_nsec, csnow, cslast, cs->mask);
-                       cs_wd_msec = div_u64_rem(cs_nsec - wd_nsec, 1000U * 1000U, &wd_rem);
-                       wd_msec = div_u64_rem(wd_nsec, 1000U * 1000U, &wd_rem);
+                       cs_wd_msec = div_s64_rem(cs_nsec - wd_nsec, 1000 * 1000, &wd_rem);
+                       wd_msec = div_s64_rem(wd_nsec, 1000 * 1000, &wd_rem);
                        pr_warn("                      Clocksource '%s' skewed %lld ns (%lld ms) over watchdog '%s' interval of %lld ns (%lld ms)\n",
                                cs->name, cs_nsec - wd_nsec, cs_wd_msec, watchdog->name, wd_nsec, wd_msec);
                        if (curr_clocksource == cs)
index 1a0519b..b28b05b 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/ktime.h>
 #include <asm/byteorder.h>
 #include <linux/torture.h>
+#include <linux/sched/rt.h>
 #include "rcu/rcu.h"
 
 MODULE_LICENSE("GPL");
@@ -54,6 +55,9 @@ module_param(verbose_sleep_frequency, int, 0444);
 static int verbose_sleep_duration = 1;
 module_param(verbose_sleep_duration, int, 0444);
 
+static int random_shuffle;
+module_param(random_shuffle, int, 0444);
+
 static char *torture_type;
 static int verbose;
 
@@ -88,8 +92,8 @@ int torture_hrtimeout_ns(ktime_t baset_ns, u32 fuzzt_ns, struct torture_random_s
        ktime_t hto = baset_ns;
 
        if (trsp)
-               hto += (torture_random(trsp) >> 3) % fuzzt_ns;
-       set_current_state(TASK_UNINTERRUPTIBLE);
+               hto += torture_random(trsp) % fuzzt_ns;
+       set_current_state(TASK_IDLE);
        return schedule_hrtimeout(&hto, HRTIMER_MODE_REL);
 }
 EXPORT_SYMBOL_GPL(torture_hrtimeout_ns);
@@ -350,22 +354,22 @@ torture_onoff(void *arg)
 
        if (onoff_holdoff > 0) {
                VERBOSE_TOROUT_STRING("torture_onoff begin holdoff");
-               schedule_timeout_interruptible(onoff_holdoff);
+               torture_hrtimeout_jiffies(onoff_holdoff, &rand);
                VERBOSE_TOROUT_STRING("torture_onoff end holdoff");
        }
        while (!torture_must_stop()) {
                if (disable_onoff_at_boot && !rcu_inkernel_boot_has_ended()) {
-                       schedule_timeout_interruptible(HZ / 10);
+                       torture_hrtimeout_jiffies(HZ / 10, &rand);
                        continue;
                }
-               cpu = (torture_random(&rand) >> 4) % (maxcpu + 1);
+               cpu = torture_random(&rand) % (maxcpu + 1);
                if (!torture_offline(cpu,
                                     &n_offline_attempts, &n_offline_successes,
                                     &sum_offline, &min_offline, &max_offline))
                        torture_online(cpu,
                                       &n_online_attempts, &n_online_successes,
                                       &sum_online, &min_online, &max_online);
-               schedule_timeout_interruptible(onoff_interval);
+               torture_hrtimeout_jiffies(onoff_interval, &rand);
        }
 
 stop:
@@ -518,6 +522,7 @@ static void torture_shuffle_task_unregister_all(void)
  */
 static void torture_shuffle_tasks(void)
 {
+       DEFINE_TORTURE_RANDOM(rand);
        struct shuffle_task *stp;
 
        cpumask_setall(shuffle_tmp_mask);
@@ -537,8 +542,10 @@ static void torture_shuffle_tasks(void)
                cpumask_clear_cpu(shuffle_idle_cpu, shuffle_tmp_mask);
 
        mutex_lock(&shuffle_task_mutex);
-       list_for_each_entry(stp, &shuffle_task_list, st_l)
-               set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
+       list_for_each_entry(stp, &shuffle_task_list, st_l) {
+               if (!random_shuffle || torture_random(&rand) & 0x1)
+                       set_cpus_allowed_ptr(stp->st_t, shuffle_tmp_mask);
+       }
        mutex_unlock(&shuffle_task_mutex);
 
        cpus_read_unlock();
@@ -550,9 +557,11 @@ static void torture_shuffle_tasks(void)
  */
 static int torture_shuffle(void *arg)
 {
+       DEFINE_TORTURE_RANDOM(rand);
+
        VERBOSE_TOROUT_STRING("torture_shuffle task started");
        do {
-               schedule_timeout_interruptible(shuffle_interval);
+               torture_hrtimeout_jiffies(shuffle_interval, &rand);
                torture_shuffle_tasks();
                torture_shutdown_absorb("torture_shuffle");
        } while (!torture_must_stop());
@@ -728,12 +737,12 @@ bool stutter_wait(const char *title)
        cond_resched_tasks_rcu_qs();
        spt = READ_ONCE(stutter_pause_test);
        for (; spt; spt = READ_ONCE(stutter_pause_test)) {
-               if (!ret) {
+               if (!ret && !rt_task(current)) {
                        sched_set_normal(current, MAX_NICE);
                        ret = true;
                }
                if (spt == 1) {
-                       schedule_timeout_interruptible(1);
+                       torture_hrtimeout_jiffies(1, NULL);
                } else if (spt == 2) {
                        while (READ_ONCE(stutter_pause_test)) {
                                if (!(i++ & 0xffff))
@@ -741,7 +750,7 @@ bool stutter_wait(const char *title)
                                cond_resched();
                        }
                } else {
-                       schedule_timeout_interruptible(round_jiffies_relative(HZ));
+                       torture_hrtimeout_jiffies(round_jiffies_relative(HZ), NULL);
                }
                torture_shutdown_absorb(title);
        }
@@ -926,7 +935,7 @@ EXPORT_SYMBOL_GPL(torture_kthread_stopping);
  * it starts, you will need to open-code your own.
  */
 int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
-                           char *f, struct task_struct **tp)
+                           char *f, struct task_struct **tp, void (*cbf)(struct task_struct *tp))
 {
        int ret = 0;
 
@@ -938,6 +947,10 @@ int _torture_create_kthread(int (*fn)(void *arg), void *arg, char *s, char *m,
                *tp = NULL;
                return ret;
        }
+
+       if (cbf)
+               cbf(*tp);
+
        wake_up_process(*tp);  // Process is sleeping, so ordering provided.
        torture_shuffle_task_register(*tp);
        return ret;
index 5f2dcab..bd1a42b 100644 (file)
@@ -661,8 +661,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nest_level);
 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
           u64, flags, void *, data, u64, size)
 {
-       struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
-       int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
+       struct bpf_trace_sample_data *sds;
        struct perf_raw_record raw = {
                .frag = {
                        .size = size,
@@ -670,7 +669,11 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
                },
        };
        struct perf_sample_data *sd;
-       int err;
+       int nest_level, err;
+
+       preempt_disable();
+       sds = this_cpu_ptr(&bpf_trace_sds);
+       nest_level = this_cpu_inc_return(bpf_trace_nest_level);
 
        if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
                err = -EBUSY;
@@ -688,9 +691,9 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
        perf_sample_save_raw_data(sd, &raw);
 
        err = __bpf_perf_event_output(regs, map, flags, sd);
-
 out:
        this_cpu_dec(bpf_trace_nest_level);
+       preempt_enable();
        return err;
 }
 
@@ -715,7 +718,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
                     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
 {
-       int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
        struct perf_raw_frag frag = {
                .copy           = ctx_copy,
                .size           = ctx_size,
@@ -732,8 +734,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
        };
        struct perf_sample_data *sd;
        struct pt_regs *regs;
+       int nest_level;
        u64 ret;
 
+       preempt_disable();
+       nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
+
        if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
                ret = -EBUSY;
                goto out;
@@ -748,6 +754,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
        ret = __bpf_perf_event_output(regs, map, flags, sd);
 out:
        this_cpu_dec(bpf_event_output_nest_level);
+       preempt_enable();
        return ret;
 }
 
index cd2c35b..c83c005 100644 (file)
@@ -15,6 +15,7 @@
 #include <trace/events/sched.h>
 
 #include "ftrace_internal.h"
+#include "trace.h"
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ASSIGN_OPS_HASH(opsname, val) \
index e4704ec..3b21f40 100644 (file)
@@ -100,14 +100,22 @@ static void fprobe_kprobe_handler(unsigned long ip, unsigned long parent_ip,
                return;
        }
 
+       /*
+        * This user handler is shared with other kprobes and is not expected to be
+        * called recursively. So if any other kprobe handler is running, this will
+        * exit as kprobe does. See the section 'Share the callbacks with kprobes'
+        * in Documentation/trace/fprobe.rst for more information.
+        */
        if (unlikely(kprobe_running())) {
                fp->nmissed++;
-               return;
+               goto recursion_unlock;
        }
 
        kprobe_busy_begin();
        __fprobe_handler(ip, parent_ip, ops, fregs);
        kprobe_busy_end();
+
+recursion_unlock:
        ftrace_test_recursion_unlock(bit);
 }
 
@@ -371,19 +379,16 @@ int unregister_fprobe(struct fprobe *fp)
        if (!fprobe_is_registered(fp))
                return -EINVAL;
 
-       /*
-        * rethook_free() starts disabling the rethook, but the rethook handlers
-        * may be running on other processors at this point. To make sure that all
-        * current running handlers are finished, call unregister_ftrace_function()
-        * after this.
-        */
        if (fp->rethook)
-               rethook_free(fp->rethook);
+               rethook_stop(fp->rethook);
 
        ret = unregister_ftrace_function(&fp->ops);
        if (ret < 0)
                return ret;
 
+       if (fp->rethook)
+               rethook_free(fp->rethook);
+
        ftrace_free_filter(&fp->ops);
 
        return ret;
index 3740aca..05c0024 100644 (file)
@@ -3305,6 +3305,22 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count)
        return cnt;
 }
 
+static void ftrace_free_pages(struct ftrace_page *pages)
+{
+       struct ftrace_page *pg = pages;
+
+       while (pg) {
+               if (pg->records) {
+                       free_pages((unsigned long)pg->records, pg->order);
+                       ftrace_number_of_pages -= 1 << pg->order;
+               }
+               pages = pg->next;
+               kfree(pg);
+               pg = pages;
+               ftrace_number_of_groups--;
+       }
+}
+
 static struct ftrace_page *
 ftrace_allocate_pages(unsigned long num_to_init)
 {
@@ -3343,17 +3359,7 @@ ftrace_allocate_pages(unsigned long num_to_init)
        return start_pg;
 
  free_pages:
-       pg = start_pg;
-       while (pg) {
-               if (pg->records) {
-                       free_pages((unsigned long)pg->records, pg->order);
-                       ftrace_number_of_pages -= 1 << pg->order;
-               }
-               start_pg = pg->next;
-               kfree(pg);
-               pg = start_pg;
-               ftrace_number_of_groups--;
-       }
+       ftrace_free_pages(start_pg);
        pr_info("ftrace: FAILED to allocate memory for functions\n");
        return NULL;
 }
@@ -6471,9 +6477,11 @@ static int ftrace_process_locs(struct module *mod,
                               unsigned long *start,
                               unsigned long *end)
 {
+       struct ftrace_page *pg_unuse = NULL;
        struct ftrace_page *start_pg;
        struct ftrace_page *pg;
        struct dyn_ftrace *rec;
+       unsigned long skipped = 0;
        unsigned long count;
        unsigned long *p;
        unsigned long addr;
@@ -6536,8 +6544,10 @@ static int ftrace_process_locs(struct module *mod,
                 * object files to satisfy alignments.
                 * Skip any NULL pointers.
                 */
-               if (!addr)
+               if (!addr) {
+                       skipped++;
                        continue;
+               }
 
                end_offset = (pg->index+1) * sizeof(pg->records[0]);
                if (end_offset > PAGE_SIZE << pg->order) {
@@ -6551,8 +6561,10 @@ static int ftrace_process_locs(struct module *mod,
                rec->ip = addr;
        }
 
-       /* We should have used all pages */
-       WARN_ON(pg->next);
+       if (pg->next) {
+               pg_unuse = pg->next;
+               pg->next = NULL;
+       }
 
        /* Assign the last page to ftrace_pages */
        ftrace_pages = pg;
@@ -6574,6 +6586,11 @@ static int ftrace_process_locs(struct module *mod,
  out:
        mutex_unlock(&ftrace_lock);
 
+       /* We should have used all pages unless we skipped some */
+       if (pg_unuse) {
+               WARN_ON(!skipped);
+               ftrace_free_pages(pg_unuse);
+       }
        return ret;
 }
 
index 382775e..5012c04 100644 (file)
@@ -2,6 +2,9 @@
 #ifndef _LINUX_KERNEL_FTRACE_INTERNAL_H
 #define  _LINUX_KERNEL_FTRACE_INTERNAL_H
 
+int __register_ftrace_function(struct ftrace_ops *ops);
+int __unregister_ftrace_function(struct ftrace_ops *ops);
+
 #ifdef CONFIG_FUNCTION_TRACER
 
 extern struct mutex ftrace_lock;
@@ -15,8 +18,6 @@ int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs);
 
 #else /* !CONFIG_DYNAMIC_FTRACE */
 
-int __register_ftrace_function(struct ftrace_ops *ops);
-int __unregister_ftrace_function(struct ftrace_ops *ops);
 /* Keep as macros so we do not need to define the commands */
 # define ftrace_startup(ops, command)                                  \
        ({                                                              \
index f32ee48..5eb9b59 100644 (file)
@@ -54,6 +54,19 @@ static void rethook_free_rcu(struct rcu_head *head)
 }
 
 /**
+ * rethook_stop() - Stop using a rethook.
+ * @rh: the struct rethook to stop.
+ *
+ * Stop using a rethook to prepare for freeing it. If you want to wait for
+ * all running rethook handler before calling rethook_free(), you need to
+ * call this first and wait RCU, and call rethook_free().
+ */
+void rethook_stop(struct rethook *rh)
+{
+       WRITE_ONCE(rh->handler, NULL);
+}
+
+/**
  * rethook_free() - Free struct rethook.
  * @rh: the struct rethook to be freed.
  *
index 834b361..52dea5d 100644 (file)
@@ -523,6 +523,8 @@ struct ring_buffer_per_cpu {
        rb_time_t                       before_stamp;
        u64                             event_stamp[MAX_NEST];
        u64                             read_stamp;
+       /* pages removed since last reset */
+       unsigned long                   pages_removed;
        /* ring buffer pages to update, > 0 to add, < 0 to remove */
        long                            nr_pages_to_update;
        struct list_head                new_pages; /* new pages to add */
@@ -536,6 +538,7 @@ struct trace_buffer {
        unsigned                        flags;
        int                             cpus;
        atomic_t                        record_disabled;
+       atomic_t                        resizing;
        cpumask_var_t                   cpumask;
 
        struct lock_class_key           *reader_lock_key;
@@ -558,6 +561,7 @@ struct ring_buffer_iter {
        struct buffer_page              *head_page;
        struct buffer_page              *cache_reader_page;
        unsigned long                   cache_read;
+       unsigned long                   cache_pages_removed;
        u64                             read_stamp;
        u64                             page_stamp;
        struct ring_buffer_event        *event;
@@ -946,6 +950,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
 /**
  * ring_buffer_wake_waiters - wake up any waiters on this ring buffer
  * @buffer: The ring buffer to wake waiters on
+ * @cpu: The CPU buffer to wake waiters on
  *
  * In the case of a file that represents a ring buffer is closing,
  * it is prudent to wake up any waiters that are on this.
@@ -1956,6 +1961,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
                to_remove = rb_list_head(to_remove)->next;
                head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
        }
+       /* Read iterators need to reset themselves when some pages removed */
+       cpu_buffer->pages_removed += nr_removed;
 
        next_page = rb_list_head(to_remove)->next;
 
@@ -1977,12 +1984,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
                cpu_buffer->head_page = list_entry(next_page,
                                                struct buffer_page, list);
 
-       /*
-        * change read pointer to make sure any read iterators reset
-        * themselves
-        */
-       cpu_buffer->read = 0;
-
        /* pages are removed, resume tracing and then free the pages */
        atomic_dec(&cpu_buffer->record_disabled);
        raw_spin_unlock_irq(&cpu_buffer->reader_lock);
@@ -2167,7 +2168,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
 
        /* prevent another thread from changing buffer sizes */
        mutex_lock(&buffer->mutex);
-
+       atomic_inc(&buffer->resizing);
 
        if (cpu_id == RING_BUFFER_ALL_CPUS) {
                /*
@@ -2322,6 +2323,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
                atomic_dec(&buffer->record_disabled);
        }
 
+       atomic_dec(&buffer->resizing);
        mutex_unlock(&buffer->mutex);
        return 0;
 
@@ -2342,6 +2344,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
                }
        }
  out_err_unlock:
+       atomic_dec(&buffer->resizing);
        mutex_unlock(&buffer->mutex);
        return err;
 }
@@ -3373,7 +3376,6 @@ void ring_buffer_nest_end(struct trace_buffer *buffer)
 /**
  * ring_buffer_unlock_commit - commit a reserved
  * @buffer: The buffer to commit to
- * @event: The event pointer to commit.
  *
  * This commits the data to the ring buffer, and releases any locks held.
  *
@@ -4392,6 +4394,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
 
        iter->cache_reader_page = iter->head_page;
        iter->cache_read = cpu_buffer->read;
+       iter->cache_pages_removed = cpu_buffer->pages_removed;
 
        if (iter->head) {
                iter->read_stamp = cpu_buffer->read_stamp;
@@ -4846,12 +4849,13 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        buffer = cpu_buffer->buffer;
 
        /*
-        * Check if someone performed a consuming read to
-        * the buffer. A consuming read invalidates the iterator
-        * and we need to reset the iterator in this case.
+        * Check if someone performed a consuming read to the buffer
+        * or removed some pages from the buffer. In these cases,
+        * iterator was invalidated and we need to reset it.
         */
        if (unlikely(iter->cache_read != cpu_buffer->read ||
-                    iter->cache_reader_page != cpu_buffer->reader_page))
+                    iter->cache_reader_page != cpu_buffer->reader_page ||
+                    iter->cache_pages_removed != cpu_buffer->pages_removed))
                rb_iter_reset(iter);
 
  again:
@@ -5242,28 +5246,34 @@ unsigned long ring_buffer_size(struct trace_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_size);
 
+static void rb_clear_buffer_page(struct buffer_page *page)
+{
+       local_set(&page->write, 0);
+       local_set(&page->entries, 0);
+       rb_init_page(page->page);
+       page->read = 0;
+}
+
 static void
 rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 {
+       struct buffer_page *page;
+
        rb_head_page_deactivate(cpu_buffer);
 
        cpu_buffer->head_page
                = list_entry(cpu_buffer->pages, struct buffer_page, list);
-       local_set(&cpu_buffer->head_page->write, 0);
-       local_set(&cpu_buffer->head_page->entries, 0);
-       local_set(&cpu_buffer->head_page->page->commit, 0);
-
-       cpu_buffer->head_page->read = 0;
+       rb_clear_buffer_page(cpu_buffer->head_page);
+       list_for_each_entry(page, cpu_buffer->pages, list) {
+               rb_clear_buffer_page(page);
+       }
 
        cpu_buffer->tail_page = cpu_buffer->head_page;
        cpu_buffer->commit_page = cpu_buffer->head_page;
 
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
        INIT_LIST_HEAD(&cpu_buffer->new_pages);
-       local_set(&cpu_buffer->reader_page->write, 0);
-       local_set(&cpu_buffer->reader_page->entries, 0);
-       local_set(&cpu_buffer->reader_page->page->commit, 0);
-       cpu_buffer->reader_page->read = 0;
+       rb_clear_buffer_page(cpu_buffer->reader_page);
 
        local_set(&cpu_buffer->entries_bytes, 0);
        local_set(&cpu_buffer->overrun, 0);
@@ -5289,6 +5299,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->last_overrun = 0;
 
        rb_head_page_activate(cpu_buffer);
+       cpu_buffer->pages_removed = 0;
 }
 
 /* Must have disabled the cpu buffer then done a synchronize_rcu */
@@ -5347,7 +5358,6 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 /**
  * ring_buffer_reset_online_cpus - reset a ring buffer per CPU buffer
  * @buffer: The ring buffer to reset a per cpu buffer of
- * @cpu: The CPU buffer to be reset
  */
 void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
 {
@@ -5535,6 +5545,15 @@ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
        if (local_read(&cpu_buffer_b->committing))
                goto out_dec;
 
+       /*
+        * When resize is in progress, we cannot swap it because
+        * it will mess the state of the cpu buffer.
+        */
+       if (atomic_read(&buffer_a->resizing))
+               goto out_dec;
+       if (atomic_read(&buffer_b->resizing))
+               goto out_dec;
+
        buffer_a->buffers[cpu] = cpu_buffer_b;
        buffer_b->buffers[cpu] = cpu_buffer_a;
 
index 4529e26..8e64aaa 100644 (file)
@@ -1928,9 +1928,10 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
                 * place on this CPU. We fail to record, but we reset
                 * the max trace buffer (no one writes directly to it)
                 * and flag that it failed.
+                * Another reason is resize is in progress.
                 */
                trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
-                       "Failed to swap buffers due to commit in progress\n");
+                       "Failed to swap buffers due to commit or resize in progress\n");
        }
 
        WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
@@ -3118,6 +3119,7 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
        struct ftrace_stack *fstack;
        struct stack_entry *entry;
        int stackidx;
+       void *ptr;
 
        /*
         * Add one, for this function and the call to save_stack_trace()
@@ -3161,9 +3163,25 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
                                    trace_ctx);
        if (!event)
                goto out;
-       entry = ring_buffer_event_data(event);
+       ptr = ring_buffer_event_data(event);
+       entry = ptr;
+
+       /*
+        * For backward compatibility reasons, the entry->caller is an
+        * array of 8 slots to store the stack. This is also exported
+        * to user space. The amount allocated on the ring buffer actually
+        * holds enough for the stack specified by nr_entries. This will
+        * go into the location of entry->caller. Due to string fortifiers
+        * checking the size of the destination of memcpy() it triggers
+        * when it detects that size is greater than 8. To hide this from
+        * the fortifiers, we use "ptr" and pointer arithmetic to assign caller.
+        *
+        * The below is really just:
+        *   memcpy(&entry->caller, fstack->calls, size);
+        */
+       ptr += offsetof(typeof(*entry), caller);
+       memcpy(ptr, fstack->calls, size);
 
-       memcpy(&entry->caller, fstack->calls, size);
        entry->size = nr_entries;
 
        if (!call_filter_check_discard(call, entry, buffer, event))
@@ -4195,8 +4213,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
         * will point to the same string as current_trace->name.
         */
        mutex_lock(&trace_types_lock);
-       if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
+       if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
+               /* Close iter->trace before switching to the new current tracer */
+               if (iter->trace->close)
+                       iter->trace->close(iter);
                *iter->trace = *tr->current_trace;
+               /* Reopen the new current tracer */
+               if (iter->trace->open)
+                       iter->trace->open(iter);
+       }
        mutex_unlock(&trace_types_lock);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
@@ -5259,11 +5284,17 @@ int tracing_set_cpumask(struct trace_array *tr,
                                !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
                        atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
                        ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
+#ifdef CONFIG_TRACER_MAX_TRACE
+                       ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
+#endif
                }
                if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
                                cpumask_test_cpu(cpu, tracing_cpumask_new)) {
                        atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
                        ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
+#ifdef CONFIG_TRACER_MAX_TRACE
+                       ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
+#endif
                }
        }
        arch_spin_unlock(&tr->max_lock);
@@ -6687,10 +6718,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
 
 #endif
 
+static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
+{
+       if (cpu == RING_BUFFER_ALL_CPUS) {
+               if (cpumask_empty(tr->pipe_cpumask)) {
+                       cpumask_setall(tr->pipe_cpumask);
+                       return 0;
+               }
+       } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
+               cpumask_set_cpu(cpu, tr->pipe_cpumask);
+               return 0;
+       }
+       return -EBUSY;
+}
+
+static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
+{
+       if (cpu == RING_BUFFER_ALL_CPUS) {
+               WARN_ON(!cpumask_full(tr->pipe_cpumask));
+               cpumask_clear(tr->pipe_cpumask);
+       } else {
+               WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
+               cpumask_clear_cpu(cpu, tr->pipe_cpumask);
+       }
+}
+
 static int tracing_open_pipe(struct inode *inode, struct file *filp)
 {
        struct trace_array *tr = inode->i_private;
        struct trace_iterator *iter;
+       int cpu;
        int ret;
 
        ret = tracing_check_open_get_tr(tr);
@@ -6698,13 +6755,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
                return ret;
 
        mutex_lock(&trace_types_lock);
+       cpu = tracing_get_cpu(inode);
+       ret = open_pipe_on_cpu(tr, cpu);
+       if (ret)
+               goto fail_pipe_on_cpu;
 
        /* create a buffer to store the information to pass to userspace */
        iter = kzalloc(sizeof(*iter), GFP_KERNEL);
        if (!iter) {
                ret = -ENOMEM;
-               __trace_array_put(tr);
-               goto out;
+               goto fail_alloc_iter;
        }
 
        trace_seq_init(&iter->seq);
@@ -6727,7 +6787,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 
        iter->tr = tr;
        iter->array_buffer = &tr->array_buffer;
-       iter->cpu_file = tracing_get_cpu(inode);
+       iter->cpu_file = cpu;
        mutex_init(&iter->mutex);
        filp->private_data = iter;
 
@@ -6737,12 +6797,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
        nonseekable_open(inode, filp);
 
        tr->trace_ref++;
-out:
+
        mutex_unlock(&trace_types_lock);
        return ret;
 
 fail:
        kfree(iter);
+fail_alloc_iter:
+       close_pipe_on_cpu(tr, cpu);
+fail_pipe_on_cpu:
        __trace_array_put(tr);
        mutex_unlock(&trace_types_lock);
        return ret;
@@ -6759,11 +6822,12 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
 
        if (iter->trace->pipe_close)
                iter->trace->pipe_close(iter);
-
+       close_pipe_on_cpu(tr, iter->cpu_file);
        mutex_unlock(&trace_types_lock);
 
        free_cpumask_var(iter->started);
        kfree(iter->fmt);
+       kfree(iter->temp);
        mutex_destroy(&iter->mutex);
        kfree(iter);
 
@@ -9422,6 +9486,9 @@ static struct trace_array *trace_array_create(const char *name)
        if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
                goto out_free_tr;
 
+       if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
+               goto out_free_tr;
+
        tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
 
        cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
@@ -9463,6 +9530,7 @@ static struct trace_array *trace_array_create(const char *name)
  out_free_tr:
        ftrace_free_ftrace_ops(tr);
        free_trace_buffers(tr);
+       free_cpumask_var(tr->pipe_cpumask);
        free_cpumask_var(tr->tracing_cpumask);
        kfree(tr->name);
        kfree(tr);
@@ -9565,6 +9633,7 @@ static int __remove_instance(struct trace_array *tr)
        }
        kfree(tr->topts);
 
+       free_cpumask_var(tr->pipe_cpumask);
        free_cpumask_var(tr->tracing_cpumask);
        kfree(tr->name);
        kfree(tr);
@@ -10362,12 +10431,14 @@ __init static int tracer_alloc_buffers(void)
        if (trace_create_savedcmd() < 0)
                goto out_free_temp_buffer;
 
+       if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
+               goto out_free_savedcmd;
+
        /* TODO: make the number of buffers hot pluggable with CPUS */
        if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
                MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
-               goto out_free_savedcmd;
+               goto out_free_pipe_cpumask;
        }
-
        if (global_trace.buffer_disabled)
                tracing_off();
 
@@ -10420,6 +10491,8 @@ __init static int tracer_alloc_buffers(void)
 
        return 0;
 
+out_free_pipe_cpumask:
+       free_cpumask_var(global_trace.pipe_cpumask);
 out_free_savedcmd:
        free_saved_cmdlines_buffer(savedcmd);
 out_free_temp_buffer:
index ed7906b..73eaec1 100644 (file)
@@ -113,6 +113,8 @@ enum trace_type {
 #define MEM_FAIL(condition, fmt, ...)                                  \
        DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
 
+#define FAULT_STRING "(fault)"
+
 #define HIST_STACKTRACE_DEPTH  16
 #define HIST_STACKTRACE_SIZE   (HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
 #define HIST_STACKTRACE_SKIP   5
@@ -375,6 +377,8 @@ struct trace_array {
        struct list_head        events;
        struct trace_event_file *trace_marker_file;
        cpumask_var_t           tracing_cpumask; /* only trace on set CPUs */
+       /* one per_cpu trace_pipe can be opened by only one user */
+       cpumask_var_t           pipe_cpumask;
        int                     ref;
        int                     trace_ref;
 #ifdef CONFIG_FUNCTION_TRACER
@@ -1293,6 +1297,14 @@ static inline void trace_branch_disable(void)
 /* set ring buffers to default size if not already done so */
 int tracing_update_buffers(void);
 
+union trace_synth_field {
+       u8                              as_u8;
+       u16                             as_u16;
+       u32                             as_u32;
+       u64                             as_u64;
+       struct trace_dynamic_info       as_dynamic;
+};
+
 struct ftrace_event_field {
        struct list_head        link;
        const char              *name;
index cb0077b..a0a704b 100644 (file)
@@ -644,6 +644,7 @@ static int enable_trace_eprobe(struct trace_event_call *call,
        struct trace_eprobe *ep;
        bool enabled;
        int ret = 0;
+       int cnt = 0;
 
        tp = trace_probe_primary_from_call(call);
        if (WARN_ON_ONCE(!tp))
@@ -667,12 +668,25 @@ static int enable_trace_eprobe(struct trace_event_call *call,
                if (ret)
                        break;
                enabled = true;
+               cnt++;
        }
 
        if (ret) {
                /* Failed to enable one of them. Roll back all */
-               if (enabled)
-                       disable_eprobe(ep, file->tr);
+               if (enabled) {
+                       /*
+                        * It's a bug if one failed for something other than memory
+                        * not being available but another eprobe succeeded.
+                        */
+                       WARN_ON_ONCE(ret != -ENOMEM);
+
+                       list_for_each_entry(pos, trace_probe_probe_list(tp), list) {
+                               ep = container_of(pos, struct trace_eprobe, tp);
+                               disable_eprobe(ep, file->tr);
+                               if (!--cnt)
+                                       break;
+                       }
+               }
                if (file)
                        trace_probe_remove_file(tp, file);
                else
index 5d6ae4e..578f1f7 100644 (file)
@@ -611,7 +611,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
 {
        struct trace_event_call *call = file->event_call;
        struct trace_array *tr = file->tr;
-       unsigned long file_flags = file->flags;
        int ret = 0;
        int disable;
 
@@ -635,6 +634,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
                                break;
                        disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
                        clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
+                       /* Disable use of trace_buffered_event */
+                       trace_buffered_event_disable();
                } else
                        disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
 
@@ -673,6 +674,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
                        if (atomic_inc_return(&file->sm_ref) > 1)
                                break;
                        set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
+                       /* Enable use of trace_buffered_event */
+                       trace_buffered_event_enable();
                }
 
                if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
@@ -712,15 +715,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
                break;
        }
 
-       /* Enable or disable use of trace_buffered_event */
-       if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
-           (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
-               if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
-                       trace_buffered_event_enable();
-               else
-                       trace_buffered_event_disable();
-       }
-
        return ret;
 }
 
index b97d3ad..d06938a 100644 (file)
@@ -6663,13 +6663,16 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
        if (get_named_trigger_data(trigger_data))
                goto enable;
 
-       if (has_hist_vars(hist_data))
-               save_hist_vars(hist_data);
-
        ret = create_actions(hist_data);
        if (ret)
                goto out_unreg;
 
+       if (has_hist_vars(hist_data) || hist_data->n_var_refs) {
+               ret = save_hist_vars(hist_data);
+               if (ret)
+                       goto out_unreg;
+       }
+
        ret = tracing_map_init(hist_data->map);
        if (ret)
                goto out_unreg;
index d6a70af..9897d0b 100644 (file)
@@ -127,7 +127,7 @@ static bool synth_event_match(const char *system, const char *event,
 
 struct synth_trace_event {
        struct trace_entry      ent;
-       u64                     fields[];
+       union trace_synth_field fields[];
 };
 
 static int synth_event_define_fields(struct trace_event_call *call)
@@ -321,19 +321,19 @@ static const char *synth_field_fmt(char *type)
 
 static void print_synth_event_num_val(struct trace_seq *s,
                                      char *print_fmt, char *name,
-                                     int size, u64 val, char *space)
+                                     int size, union trace_synth_field *val, char *space)
 {
        switch (size) {
        case 1:
-               trace_seq_printf(s, print_fmt, name, (u8)val, space);
+               trace_seq_printf(s, print_fmt, name, val->as_u8, space);
                break;
 
        case 2:
-               trace_seq_printf(s, print_fmt, name, (u16)val, space);
+               trace_seq_printf(s, print_fmt, name, val->as_u16, space);
                break;
 
        case 4:
-               trace_seq_printf(s, print_fmt, name, (u32)val, space);
+               trace_seq_printf(s, print_fmt, name, val->as_u32, space);
                break;
 
        default:
@@ -350,7 +350,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
        struct trace_seq *s = &iter->seq;
        struct synth_trace_event *entry;
        struct synth_event *se;
-       unsigned int i, n_u64;
+       unsigned int i, j, n_u64;
        char print_fmt[32];
        const char *fmt;
 
@@ -374,43 +374,28 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
                /* parameter values */
                if (se->fields[i]->is_string) {
                        if (se->fields[i]->is_dynamic) {
-                               u32 offset, data_offset;
-                               char *str_field;
-
-                               offset = (u32)entry->fields[n_u64];
-                               data_offset = offset & 0xffff;
-
-                               str_field = (char *)entry + data_offset;
+                               union trace_synth_field *data = &entry->fields[n_u64];
 
                                trace_seq_printf(s, print_fmt, se->fields[i]->name,
                                                 STR_VAR_LEN_MAX,
-                                                str_field,
+                                                (char *)entry + data->as_dynamic.offset,
                                                 i == se->n_fields - 1 ? "" : " ");
                                n_u64++;
                        } else {
                                trace_seq_printf(s, print_fmt, se->fields[i]->name,
                                                 STR_VAR_LEN_MAX,
-                                                (char *)&entry->fields[n_u64],
+                                                (char *)&entry->fields[n_u64].as_u64,
                                                 i == se->n_fields - 1 ? "" : " ");
                                n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
                        }
                } else if (se->fields[i]->is_stack) {
-                       u32 offset, data_offset, len;
-                       unsigned long *p, *end;
-
-                       offset = (u32)entry->fields[n_u64];
-                       data_offset = offset & 0xffff;
-                       len = offset >> 16;
-
-                       p = (void *)entry + data_offset;
-                       end = (void *)p + len - (sizeof(long) - 1);
+                       union trace_synth_field *data = &entry->fields[n_u64];
+                       unsigned long *p = (void *)entry + data->as_dynamic.offset;
 
                        trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
-
-                       for (; *p && p < end; p++)
-                               trace_seq_printf(s, "=> %pS\n", (void *)*p);
+                       for (j = 1; j < data->as_dynamic.len / sizeof(long); j++)
+                               trace_seq_printf(s, "=> %pS\n", (void *)p[j]);
                        n_u64++;
-
                } else {
                        struct trace_print_flags __flags[] = {
                            __def_gfpflag_names, {-1, NULL} };
@@ -419,13 +404,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
                        print_synth_event_num_val(s, print_fmt,
                                                  se->fields[i]->name,
                                                  se->fields[i]->size,
-                                                 entry->fields[n_u64],
+                                                 &entry->fields[n_u64],
                                                  space);
 
                        if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
                                trace_seq_puts(s, " (");
                                trace_print_flags_seq(s, "|",
-                                                     entry->fields[n_u64],
+                                                     entry->fields[n_u64].as_u64,
                                                      __flags);
                                trace_seq_putc(s, ')');
                        }
@@ -454,21 +439,16 @@ static unsigned int trace_string(struct synth_trace_event *entry,
        int ret;
 
        if (is_dynamic) {
-               u32 data_offset;
+               union trace_synth_field *data = &entry->fields[*n_u64];
 
-               data_offset = struct_size(entry, fields, event->n_u64);
-               data_offset += data_size;
-
-               len = fetch_store_strlen((unsigned long)str_val);
-
-               data_offset |= len << 16;
-               *(u32 *)&entry->fields[*n_u64] = data_offset;
+               data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
+               data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);
 
                ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);
 
                (*n_u64)++;
        } else {
-               str_field = (char *)&entry->fields[*n_u64];
+               str_field = (char *)&entry->fields[*n_u64].as_u64;
 
 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
                if ((unsigned long)str_val < TASK_SIZE)
@@ -492,6 +472,7 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
                                 unsigned int data_size,
                                 unsigned int *n_u64)
 {
+       union trace_synth_field *data = &entry->fields[*n_u64];
        unsigned int len;
        u32 data_offset;
        void *data_loc;
@@ -504,10 +485,6 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
                        break;
        }
 
-       /* Include the zero'd element if it fits */
-       if (len < HIST_STACKTRACE_DEPTH)
-               len++;
-
        len *= sizeof(long);
 
        /* Find the dynamic section to copy the stack into. */
@@ -515,8 +492,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
        memcpy(data_loc, stack, len);
 
        /* Fill in the field that holds the offset/len combo */
-       data_offset |= len << 16;
-       *(u32 *)&entry->fields[*n_u64] = data_offset;
+
+       data->as_dynamic.offset = data_offset;
+       data->as_dynamic.len = len;
 
        (*n_u64)++;
 
@@ -550,7 +528,8 @@ static notrace void trace_event_raw_event_synth(void *__data,
                str_val = (char *)(long)var_ref_vals[val_idx];
 
                if (event->dynamic_fields[i]->is_stack) {
-                       len = *((unsigned long *)str_val);
+                       /* reserve one extra element for size */
+                       len = *((unsigned long *)str_val) + 1;
                        len *= sizeof(unsigned long);
                } else {
                        len = fetch_store_strlen((unsigned long)str_val);
@@ -592,19 +571,19 @@ static notrace void trace_event_raw_event_synth(void *__data,
 
                        switch (field->size) {
                        case 1:
-                               *(u8 *)&entry->fields[n_u64] = (u8)val;
+                               entry->fields[n_u64].as_u8 = (u8)val;
                                break;
 
                        case 2:
-                               *(u16 *)&entry->fields[n_u64] = (u16)val;
+                               entry->fields[n_u64].as_u16 = (u16)val;
                                break;
 
                        case 4:
-                               *(u32 *)&entry->fields[n_u64] = (u32)val;
+                               entry->fields[n_u64].as_u32 = (u32)val;
                                break;
 
                        default:
-                               entry->fields[n_u64] = val;
+                               entry->fields[n_u64].as_u64 = val;
                                break;
                        }
                        n_u64++;
@@ -1230,6 +1209,7 @@ EXPORT_SYMBOL_GPL(__synth_event_gen_cmd_start);
  * synth_event_gen_cmd_array_start - Start synthetic event command from an array
  * @cmd: A pointer to the dynevent_cmd struct representing the new event
  * @name: The name of the synthetic event
+ * @mod: The module creating the event, NULL if not created from a module
  * @fields: An array of type/name field descriptions
  * @n_fields: The number of field descriptions contained in the fields array
  *
@@ -1790,19 +1770,19 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
 
                        switch (field->size) {
                        case 1:
-                               *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+                               state.entry->fields[n_u64].as_u8 = (u8)val;
                                break;
 
                        case 2:
-                               *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+                               state.entry->fields[n_u64].as_u16 = (u16)val;
                                break;
 
                        case 4:
-                               *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+                               state.entry->fields[n_u64].as_u32 = (u32)val;
                                break;
 
                        default:
-                               state.entry->fields[n_u64] = val;
+                               state.entry->fields[n_u64].as_u64 = val;
                                break;
                        }
                        n_u64++;
@@ -1883,19 +1863,19 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
 
                        switch (field->size) {
                        case 1:
-                               *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+                               state.entry->fields[n_u64].as_u8 = (u8)val;
                                break;
 
                        case 2:
-                               *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+                               state.entry->fields[n_u64].as_u16 = (u16)val;
                                break;
 
                        case 4:
-                               *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+                               state.entry->fields[n_u64].as_u32 = (u32)val;
                                break;
 
                        default:
-                               state.entry->fields[n_u64] = val;
+                               state.entry->fields[n_u64].as_u64 = val;
                                break;
                        }
                        n_u64++;
@@ -2030,19 +2010,19 @@ static int __synth_event_add_val(const char *field_name, u64 val,
        } else {
                switch (field->size) {
                case 1:
-                       *(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
+                       trace_state->entry->fields[field->offset].as_u8 = (u8)val;
                        break;
 
                case 2:
-                       *(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
+                       trace_state->entry->fields[field->offset].as_u16 = (u16)val;
                        break;
 
                case 4:
-                       *(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
+                       trace_state->entry->fields[field->offset].as_u32 = (u32)val;
                        break;
 
                default:
-                       trace_state->entry->fields[field->offset] = val;
+                       trace_state->entry->fields[field->offset].as_u64 = val;
                        break;
                }
        }
index e535959..46439e3 100644 (file)
@@ -31,7 +31,9 @@ void trigger_data_free(struct event_trigger_data *data)
 /**
  * event_triggers_call - Call triggers associated with a trace event
  * @file: The trace_event_file associated with the event
+ * @buffer: The ring buffer that the event is being written to
  * @rec: The trace entry for the event, NULL for unconditional invocation
+ * @event: The event meta data in the ring buffer
  *
  * For each trigger associated with an event, invoke the trigger
  * function registered with the associated trigger command.  If rec is
index 4f5e74b..33cb6af 100644 (file)
@@ -1317,6 +1317,9 @@ static int user_field_set_string(struct ftrace_event_field *field,
        pos += snprintf(buf + pos, LEN_OR_ZERO, " ");
        pos += snprintf(buf + pos, LEN_OR_ZERO, "%s", field->name);
 
+       if (str_has_prefix(field->type, "struct "))
+               pos += snprintf(buf + pos, LEN_OR_ZERO, " %d", field->size);
+
        if (colon)
                pos += snprintf(buf + pos, LEN_OR_ZERO, ";");
 
index 590b3d5..ba37f76 100644 (file)
@@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
 {
        if (is_graph(iter->tr))
                graph_trace_open(iter);
-
+       else
+               iter->private = NULL;
 }
 
 static void irqsoff_trace_close(struct trace_iterator *iter)
index 16548ee..3851cd1 100644 (file)
@@ -1,4 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
+
+#include "trace_kprobe_selftest.h"
+
 /*
  * Function used during the kprobe self test. This function is in a separate
  * compile unit so it can be compile with CC_FLAGS_FTRACE to ensure that it
index 7ba371d..c68a727 100644 (file)
@@ -67,7 +67,7 @@ int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, void *data, void *ent)
        int len = *(u32 *)data >> 16;
 
        if (!len)
-               trace_seq_puts(s, "(fault)");
+               trace_seq_puts(s, FAULT_STRING);
        else
                trace_seq_printf(s, "\"%s\"",
                                 (const char *)get_loc_data(data, ent));
@@ -386,12 +386,12 @@ static const struct btf_type *find_btf_func_proto(const char *funcname)
 
        /* Get BTF_KIND_FUNC type */
        t = btf_type_by_id(btf, id);
-       if (!btf_type_is_func(t))
+       if (!t || !btf_type_is_func(t))
                return ERR_PTR(-ENOENT);
 
        /* The type of BTF_KIND_FUNC is BTF_KIND_FUNC_PROTO */
        t = btf_type_by_id(btf, t->type);
-       if (!btf_type_is_func_proto(t))
+       if (!t || !btf_type_is_func_proto(t))
                return ERR_PTR(-ENOENT);
 
        return t;
@@ -443,7 +443,7 @@ static int parse_btf_arg(const char *varname, struct fetch_insn *code,
        if (!ctx->params) {
                params = find_btf_func_param(ctx->funcname, &ctx->nr_params,
                                             ctx->flags & TPARG_FL_TPOINT);
-               if (IS_ERR(params)) {
+               if (IS_ERR_OR_NULL(params)) {
                        trace_probe_log_err(ctx->offset, NO_BTF_ENTRY);
                        return PTR_ERR(params);
                }
@@ -1273,7 +1273,7 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
 
        params = find_btf_func_param(ctx->funcname, &nr_params,
                                     ctx->flags & TPARG_FL_TPOINT);
-       if (IS_ERR(params)) {
+       if (IS_ERR_OR_NULL(params)) {
                if (args_idx != -1) {
                        /* $arg* requires BTF info */
                        trace_probe_log_err(0, NOSUP_BTFARG);
index c4e1d4c..bb723ee 100644 (file)
@@ -2,8 +2,6 @@
 #ifndef __TRACE_PROBE_KERNEL_H_
 #define __TRACE_PROBE_KERNEL_H_
 
-#define FAULT_STRING "(fault)"
-
 /*
  * This depends on trace_probe.h, but can not include it due to
  * the way trace_probe_tmpl.h is used by trace_kprobe.c and trace_eprobe.c.
@@ -15,16 +13,8 @@ static nokprobe_inline int
 fetch_store_strlen_user(unsigned long addr)
 {
        const void __user *uaddr =  (__force const void __user *)addr;
-       int ret;
 
-       ret = strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
-       /*
-        * strnlen_user_nofault returns zero on fault, insert the
-        * FAULT_STRING when that occurs.
-        */
-       if (ret <= 0)
-               return strlen(FAULT_STRING) + 1;
-       return ret;
+       return strnlen_user_nofault(uaddr, MAX_STRING_SIZE);
 }
 
 /* Return the length of string -- including null terminal byte */
@@ -44,18 +34,14 @@ fetch_store_strlen(unsigned long addr)
                len++;
        } while (c && ret == 0 && len < MAX_STRING_SIZE);
 
-       /* For faults, return enough to hold the FAULT_STRING */
-       return (ret < 0) ? strlen(FAULT_STRING) + 1 : len;
+       return (ret < 0) ? ret : len;
 }
 
-static nokprobe_inline void set_data_loc(int ret, void *dest, void *__dest, void *base, int len)
+static nokprobe_inline void set_data_loc(int ret, void *dest, void *__dest, void *base)
 {
-       if (ret >= 0) {
-               *(u32 *)dest = make_data_loc(ret, __dest - base);
-       } else {
-               strscpy(__dest, FAULT_STRING, len);
-               ret = strlen(__dest) + 1;
-       }
+       if (ret < 0)
+               ret = 0;
+       *(u32 *)dest = make_data_loc(ret, __dest - base);
 }
 
 /*
@@ -76,7 +62,7 @@ fetch_store_string_user(unsigned long addr, void *dest, void *base)
        __dest = get_loc_data(dest, base);
 
        ret = strncpy_from_user_nofault(__dest, uaddr, maxlen);
-       set_data_loc(ret, dest, __dest, base, maxlen);
+       set_data_loc(ret, dest, __dest, base);
 
        return ret;
 }
@@ -107,7 +93,7 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
         * probing.
         */
        ret = strncpy_from_kernel_nofault(__dest, (void *)addr, maxlen);
-       set_data_loc(ret, dest, __dest, base, maxlen);
+       set_data_loc(ret, dest, __dest, base);
 
        return ret;
 }
index 0070763..3935b34 100644 (file)
@@ -156,11 +156,11 @@ stage3:
                        code++;
                        goto array;
                case FETCH_OP_ST_USTRING:
-                       ret += fetch_store_strlen_user(val + code->offset);
+                       ret = fetch_store_strlen_user(val + code->offset);
                        code++;
                        goto array;
                case FETCH_OP_ST_SYMSTR:
-                       ret += fetch_store_symstrlen(val + code->offset);
+                       ret = fetch_store_symstrlen(val + code->offset);
                        code++;
                        goto array;
                default:
@@ -204,6 +204,8 @@ stage3:
 array:
        /* the last stage: Loop on array */
        if (code->op == FETCH_OP_LP_ARRAY) {
+               if (ret < 0)
+                       ret = 0;
                total += ret;
                if (++i < code->param) {
                        code = s3;
@@ -265,9 +267,7 @@ store_trace_args(void *data, struct trace_probe *tp, void *rec,
                if (unlikely(arg->dynamic))
                        *dl = make_data_loc(maxlen, dyndata - base);
                ret = process_fetch_insn(arg->code, rec, dl, base);
-               if (unlikely(ret < 0 && arg->dynamic)) {
-                       *dl = make_data_loc(0, dyndata - base);
-               } else {
+               if (arg->dynamic && likely(ret > 0)) {
                        dyndata += ret;
                        maxlen -= ret;
                }
index 330aee1..0469a04 100644 (file)
@@ -168,6 +168,8 @@ static void wakeup_trace_open(struct trace_iterator *iter)
 {
        if (is_graph(iter->tr))
                graph_trace_open(iter);
+       else
+               iter->private = NULL;
 }
 
 static void wakeup_trace_close(struct trace_iterator *iter)
index e5e2992..bac06ee 100644 (file)
@@ -131,6 +131,7 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask);
  * trace_seq_vprintf - sequence printing of trace information
  * @s: trace sequence descriptor
  * @fmt: printf format string
+ * @args: Arguments for the format string
  *
  * The tracer may use either sequence operations or its own
  * copy to user routines. To simplify formatting of a trace
index fa09b33..688bf57 100644 (file)
@@ -170,7 +170,8 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
                         */
                        ret++;
                *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
-       }
+       } else
+               *(u32 *)dest = make_data_loc(0, (void *)dst - base);
 
        return ret;
 }
index 2c765ee..99c37ee 100644 (file)
@@ -272,10 +272,6 @@ extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i);
 extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i);
 extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i);
 
-extern void tracing_map_set_field_descr(struct tracing_map *map,
-                                       unsigned int i,
-                                       unsigned int key_offset,
-                                       tracing_map_cmp_fn_t cmp_fn);
 extern int
 tracing_map_sort_entries(struct tracing_map *map,
                         struct tracing_map_sort_key *sort_keys,
index 02a8f40..800b420 100644 (file)
@@ -52,6 +52,7 @@
 #include <linux/sched/debug.h>
 #include <linux/nmi.h>
 #include <linux/kvm_para.h>
+#include <linux/delay.h>
 
 #include "workqueue_internal.h"
 
@@ -338,8 +339,10 @@ static cpumask_var_t *wq_numa_possible_cpumask;
  * Per-cpu work items which run for longer than the following threshold are
  * automatically considered CPU intensive and excluded from concurrency
  * management to prevent them from noticeably delaying other per-cpu work items.
+ * ULONG_MAX indicates that the user hasn't overridden it with a boot parameter.
+ * The actual value is initialized in wq_cpu_intensive_thresh_init().
  */
-static unsigned long wq_cpu_intensive_thresh_us = 10000;
+static unsigned long wq_cpu_intensive_thresh_us = ULONG_MAX;
 module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644);
 
 static bool wq_disable_numa;
@@ -6513,6 +6516,42 @@ void __init workqueue_init_early(void)
               !system_freezable_power_efficient_wq);
 }
 
+static void __init wq_cpu_intensive_thresh_init(void)
+{
+       unsigned long thresh;
+       unsigned long bogo;
+
+       /* if the user set it to a specific value, keep it */
+       if (wq_cpu_intensive_thresh_us != ULONG_MAX)
+               return;
+
+       /*
+        * The default of 10ms is derived from the fact that most modern (as of
+        * 2023) processors can do a lot in 10ms and that it's just below what
+        * most consider human-perceivable. However, the kernel also runs on a
+        * lot slower CPUs including microcontrollers where the threshold is way
+        * too low.
+        *
+        * Let's scale up the threshold upto 1 second if BogoMips is below 4000.
+        * This is by no means accurate but it doesn't have to be. The mechanism
+        * is still useful even when the threshold is fully scaled up. Also, as
+        * the reports would usually be applicable to everyone, some machines
+        * operating on longer thresholds won't significantly diminish their
+        * usefulness.
+        */
+       thresh = 10 * USEC_PER_MSEC;
+
+       /* see init/calibrate.c for lpj -> BogoMIPS calculation */
+       bogo = max_t(unsigned long, loops_per_jiffy / 500000 * HZ, 1);
+       if (bogo < 4000)
+               thresh = min_t(unsigned long, thresh * 4000 / bogo, USEC_PER_SEC);
+
+       pr_debug("wq_cpu_intensive_thresh: lpj=%lu BogoMIPS=%lu thresh_us=%lu\n",
+                loops_per_jiffy, bogo, thresh);
+
+       wq_cpu_intensive_thresh_us = thresh;
+}
+
 /**
  * workqueue_init - bring workqueue subsystem fully online
  *
@@ -6528,6 +6567,8 @@ void __init workqueue_init(void)
        struct worker_pool *pool;
        int cpu, bkt;
 
+       wq_cpu_intensive_thresh_init();
+
        /*
         * It'd be simpler to initialize NUMA in workqueue_init_early() but
         * CPU to node mapping may not be available that early on some
index fbc89ba..73a0c8e 100644 (file)
@@ -1200,7 +1200,7 @@ config WQ_CPU_INTENSIVE_REPORT
        help
          Say Y here to enable reporting of concurrency-managed per-cpu work
          items that hog CPUs for longer than
-         workqueue.cpu_intensive_threshold_us. Workqueue automatically
+         workqueue.cpu_intensive_thresh_us. Workqueue automatically
          detects and excludes them from concurrency management to prevent
          them from stalling other per-cpu work items. Occassional
          triggering may not necessarily indicate a problem. Repeated
@@ -1673,10 +1673,15 @@ menu "Debug kernel data structures"
 
 config DEBUG_LIST
        bool "Debug linked list manipulation"
-       depends on DEBUG_KERNEL || BUG_ON_DATA_CORRUPTION
+       depends on DEBUG_KERNEL
+       select LIST_HARDENED
        help
-         Enable this to turn on extended checks in the linked-list
-         walking routines.
+         Enable this to turn on extended checks in the linked-list walking
+         routines.
+
+         This option trades better quality error reports for performance, and
+         is more suitable for kernel debugging. If you care about performance,
+         you should only enable CONFIG_LIST_HARDENED instead.
 
          If unsure, say N.
 
@@ -1710,16 +1715,6 @@ config DEBUG_NOTIFIERS
          This is a relatively cheap check but if you care about maximum
          performance, say N.
 
-config BUG_ON_DATA_CORRUPTION
-       bool "Trigger a BUG when data corruption is detected"
-       select DEBUG_LIST
-       help
-         Select this option if the kernel should BUG when it encounters
-         data corruption in kernel memory structures when they get checked
-         for validity.
-
-         If unsure, say N.
-
 config DEBUG_MAPLE_TREE
        bool "Debug maple trees"
        depends on DEBUG_KERNEL
index efae7e0..59e21bf 100644 (file)
@@ -13,7 +13,7 @@ menuconfig UBSAN
 if UBSAN
 
 config UBSAN_TRAP
-       bool "On Sanitizer warnings, abort the running kernel code"
+       bool "Abort on Sanitizer warnings (smaller kernel but less verbose)"
        depends on !COMPILE_TEST
        help
          Building kernels with Sanitizer features enabled tends to grow
@@ -26,6 +26,14 @@ config UBSAN_TRAP
          the system. For some system builders this is an acceptable
          trade-off.
 
+         Also note that selecting Y will cause your kernel to Oops
+         with an "illegal instruction" error with no further details
+         when a UBSAN violation occurs. (Except on arm64, which will
+         report which Sanitizer failed.) This may make it hard to
+         determine whether an Oops was caused by UBSAN or to figure
+         out the details of a UBSAN violation. It makes the kernel log
+         output less useful for bug reports.
+
 config CC_HAS_UBSAN_BOUNDS_STRICT
        def_bool $(cc-option,-fsanitize=bounds-strict)
        help
index 42d307a..d139778 100644 (file)
@@ -82,7 +82,13 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_SCANF) += test_scanf.o
+
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
+ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy)
+# FIXME: Clang breaks test_bitmap_const_eval when KASAN and GCOV are enabled
+GCOV_PROFILE_test_bitmap.o := n
+endif
+
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
@@ -161,7 +167,7 @@ obj-$(CONFIG_BTREE) += btree.o
 obj-$(CONFIG_INTERVAL_TREE) += interval_tree.o
 obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
 obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
-obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+obj-$(CONFIG_LIST_HARDENED) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
index 0d3a686..fb8c0c5 100644 (file)
@@ -28,36 +28,16 @@ int __weak __clzsi2(int val)
 }
 EXPORT_SYMBOL(__clzsi2);
 
-int __weak __clzdi2(long val);
-int __weak __ctzdi2(long val);
-#if BITS_PER_LONG == 32
-
-int __weak __clzdi2(long val)
+int __weak __clzdi2(u64 val);
+int __weak __clzdi2(u64 val)
 {
-       return 32 - fls((int)val);
+       return 64 - fls64(val);
 }
 EXPORT_SYMBOL(__clzdi2);
 
-int __weak __ctzdi2(long val)
+int __weak __ctzdi2(u64 val);
+int __weak __ctzdi2(u64 val)
 {
-       return __ffs((u32)val);
+       return __ffs64(val);
 }
 EXPORT_SYMBOL(__ctzdi2);
-
-#elif BITS_PER_LONG == 64
-
-int __weak __clzdi2(long val)
-{
-       return 64 - fls64((u64)val);
-}
-EXPORT_SYMBOL(__clzdi2);
-
-int __weak __ctzdi2(long val)
-{
-       return __ffs64((u64)val);
-}
-EXPORT_SYMBOL(__ctzdi2);
-
-#else
-#error BITS_PER_LONG not 32 or 64
-#endif
index de356f1..a7fd02b 100644 (file)
@@ -45,6 +45,7 @@ EXPORT_SYMBOL(cpumask_next_wrap);
  * alloc_cpumask_var_node - allocate a struct cpumask on a given node
  * @mask: pointer to cpumask_var_t where the cpumask is returned
  * @flags: GFP_ flags
+ * @node: memory node from which to allocate or %NUMA_NO_NODE
  *
  * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
  * a nop returning a constant 1 (in <linux/cpumask.h>)
@@ -157,7 +158,9 @@ EXPORT_SYMBOL(cpumask_local_spread);
 static DEFINE_PER_CPU(int, distribute_cpu_mask_prev);
 
 /**
- * cpumask_any_and_distribute - Return an arbitrary cpu within srcp1 & srcp2.
+ * cpumask_any_and_distribute - Return an arbitrary cpu within src1p & src2p.
+ * @src1p: first &cpumask for intersection
+ * @src2p: second &cpumask for intersection
  *
  * Iterated calls using the same srcp1 and srcp2 will be distributed within
  * their intersection.
index 0c883d6..6c644f9 100644 (file)
@@ -895,7 +895,7 @@ struct gen_pool *of_gen_pool_get(struct device_node *np,
 
                of_property_read_string(np_pool, "label", &name);
                if (!name)
-                       name = np_pool->name;
+                       name = of_node_full_name(np_pool);
        }
        if (pdev)
                pool = gen_pool_get(&pdev->dev, name);
index b667b1e..4247370 100644 (file)
@@ -566,24 +566,37 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_zero);
 
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
-                                 struct iov_iter *i)
+size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
+               size_t bytes, struct iov_iter *i)
 {
-       char *kaddr = kmap_atomic(page), *p = kaddr + offset;
-       if (!page_copy_sane(page, offset, bytes)) {
-               kunmap_atomic(kaddr);
+       size_t n, copied = 0;
+
+       if (!page_copy_sane(page, offset, bytes))
                return 0;
-       }
-       if (WARN_ON_ONCE(!i->data_source)) {
-               kunmap_atomic(kaddr);
+       if (WARN_ON_ONCE(!i->data_source))
                return 0;
-       }
-       iterate_and_advance(i, bytes, base, len, off,
-               copyin(p + off, base, len),
-               memcpy_from_iter(i, p + off, base, len)
-       )
-       kunmap_atomic(kaddr);
-       return bytes;
+
+       do {
+               char *p;
+
+               n = bytes - copied;
+               if (PageHighMem(page)) {
+                       page += offset / PAGE_SIZE;
+                       offset %= PAGE_SIZE;
+                       n = min_t(size_t, n, PAGE_SIZE - offset);
+               }
+
+               p = kmap_atomic(page) + offset;
+               iterate_and_advance(i, n, base, len, off,
+                       copyin(p + off, base, len),
+                       memcpy_from_iter(i, p + off, base, len)
+               )
+               kunmap_atomic(p);
+               copied += n;
+               offset += n;
+       } while (PageHighMem(page) && copied != bytes && n > 0);
+
+       return copied;
 }
 EXPORT_SYMBOL(copy_page_from_iter_atomic);
 
@@ -1349,7 +1362,7 @@ uaccess_end:
        return ret;
 }
 
-static int copy_iovec_from_user(struct iovec *iov,
+static __noclone int copy_iovec_from_user(struct iovec *iov,
                const struct iovec __user *uiov, unsigned long nr_segs)
 {
        int ret = -EFAULT;
index d98d43f..db60241 100644 (file)
@@ -2,7 +2,8 @@
  * Copyright 2006, Red Hat, Inc., Dave Jones
  * Released under the General Public License (GPL).
  *
- * This file contains the linked list validation for DEBUG_LIST.
+ * This file contains the linked list validation and error reporting for
+ * LIST_HARDENED and DEBUG_LIST.
  */
 
 #include <linux/export.h>
@@ -17,8 +18,9 @@
  * attempt).
  */
 
-bool __list_add_valid(struct list_head *new, struct list_head *prev,
-                     struct list_head *next)
+__list_valid_slowpath
+bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev,
+                               struct list_head *next)
 {
        if (CHECK_DATA_CORRUPTION(prev == NULL,
                        "list_add corruption. prev is NULL.\n") ||
@@ -37,9 +39,10 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev,
 
        return true;
 }
-EXPORT_SYMBOL(__list_add_valid);
+EXPORT_SYMBOL(__list_add_valid_or_report);
 
-bool __list_del_entry_valid(struct list_head *entry)
+__list_valid_slowpath
+bool __list_del_entry_valid_or_report(struct list_head *entry)
 {
        struct list_head *prev, *next;
 
@@ -65,6 +68,5 @@ bool __list_del_entry_valid(struct list_head *entry)
                return false;
 
        return true;
-
 }
-EXPORT_SYMBOL(__list_del_entry_valid);
+EXPORT_SYMBOL(__list_del_entry_valid_or_report);
index 8d24279..6f6a5fc 100644 (file)
@@ -2506,94 +2506,29 @@ static void fs_reclaim_tests(void)
        pr_cont("\n");
 }
 
-#define __guard(cleanup) __maybe_unused __attribute__((__cleanup__(cleanup)))
+/* Defines guard classes to create contexts */
+DEFINE_LOCK_GUARD_0(HARDIRQ, HARDIRQ_ENTER(), HARDIRQ_EXIT())
+DEFINE_LOCK_GUARD_0(NOTTHREADED_HARDIRQ,
+       do {
+               local_irq_disable();
+               __irq_enter();
+               WARN_ON(!in_irq());
+       } while(0), HARDIRQ_EXIT())
+DEFINE_LOCK_GUARD_0(SOFTIRQ, SOFTIRQ_ENTER(), SOFTIRQ_EXIT())
+
+/* Define RCU guards, should go away when RCU has its own guard definitions */
+DEFINE_LOCK_GUARD_0(RCU, rcu_read_lock(), rcu_read_unlock())
+DEFINE_LOCK_GUARD_0(RCU_BH, rcu_read_lock_bh(), rcu_read_unlock_bh())
+DEFINE_LOCK_GUARD_0(RCU_SCHED, rcu_read_lock_sched(), rcu_read_unlock_sched())
 
-static void hardirq_exit(int *_)
-{
-       HARDIRQ_EXIT();
-}
-
-#define HARDIRQ_CONTEXT(name, ...)                                     \
-       int hardirq_guard_##name __guard(hardirq_exit);                 \
-       HARDIRQ_ENTER();
-
-#define NOTTHREADED_HARDIRQ_CONTEXT(name, ...)                         \
-       int notthreaded_hardirq_guard_##name __guard(hardirq_exit);     \
-       local_irq_disable();                                            \
-       __irq_enter();                                                  \
-       WARN_ON(!in_irq());
-
-static void softirq_exit(int *_)
-{
-       SOFTIRQ_EXIT();
-}
-
-#define SOFTIRQ_CONTEXT(name, ...)                             \
-       int softirq_guard_##name __guard(softirq_exit);         \
-       SOFTIRQ_ENTER();
-
-static void rcu_exit(int *_)
-{
-       rcu_read_unlock();
-}
-
-#define RCU_CONTEXT(name, ...)                                 \
-       int rcu_guard_##name __guard(rcu_exit);                 \
-       rcu_read_lock();
-
-static void rcu_bh_exit(int *_)
-{
-       rcu_read_unlock_bh();
-}
-
-#define RCU_BH_CONTEXT(name, ...)                              \
-       int rcu_bh_guard_##name __guard(rcu_bh_exit);           \
-       rcu_read_lock_bh();
-
-static void rcu_sched_exit(int *_)
-{
-       rcu_read_unlock_sched();
-}
-
-#define RCU_SCHED_CONTEXT(name, ...)                           \
-       int rcu_sched_guard_##name __guard(rcu_sched_exit);     \
-       rcu_read_lock_sched();
-
-static void raw_spinlock_exit(raw_spinlock_t **lock)
-{
-       raw_spin_unlock(*lock);
-}
-
-#define RAW_SPINLOCK_CONTEXT(name, lock)                                               \
-       raw_spinlock_t *raw_spinlock_guard_##name __guard(raw_spinlock_exit) = &(lock); \
-       raw_spin_lock(&(lock));
-
-static void spinlock_exit(spinlock_t **lock)
-{
-       spin_unlock(*lock);
-}
-
-#define SPINLOCK_CONTEXT(name, lock)                                           \
-       spinlock_t *spinlock_guard_##name __guard(spinlock_exit) = &(lock);     \
-       spin_lock(&(lock));
-
-static void mutex_exit(struct mutex **lock)
-{
-       mutex_unlock(*lock);
-}
-
-#define MUTEX_CONTEXT(name, lock)                                      \
-       struct mutex *mutex_guard_##name __guard(mutex_exit) = &(lock); \
-       mutex_lock(&(lock));
 
 #define GENERATE_2_CONTEXT_TESTCASE(outer, outer_lock, inner, inner_lock)      \
                                                                                \
 static void __maybe_unused inner##_in_##outer(void)                            \
 {                                                                              \
-       outer##_CONTEXT(_, outer_lock);                                         \
-       {                                                                       \
-               inner##_CONTEXT(_, inner_lock);                                 \
-       }                                                                       \
+       /* Relies the reversed clean-up ordering: inner first */                \
+       guard(outer)(outer_lock);                                               \
+       guard(inner)(inner_lock);                                               \
 }
 
 /*
@@ -2632,21 +2567,21 @@ GENERATE_2_CONTEXT_TESTCASE(SOFTIRQ, , inner, inner_lock)                       \
 GENERATE_2_CONTEXT_TESTCASE(RCU, , inner, inner_lock)                          \
 GENERATE_2_CONTEXT_TESTCASE(RCU_BH, , inner, inner_lock)                       \
 GENERATE_2_CONTEXT_TESTCASE(RCU_SCHED, , inner, inner_lock)                    \
-GENERATE_2_CONTEXT_TESTCASE(RAW_SPINLOCK, raw_lock_A, inner, inner_lock)       \
-GENERATE_2_CONTEXT_TESTCASE(SPINLOCK, lock_A, inner, inner_lock)               \
-GENERATE_2_CONTEXT_TESTCASE(MUTEX, mutex_A, inner, inner_lock)
+GENERATE_2_CONTEXT_TESTCASE(raw_spinlock, &raw_lock_A, inner, inner_lock)      \
+GENERATE_2_CONTEXT_TESTCASE(spinlock, &lock_A, inner, inner_lock)              \
+GENERATE_2_CONTEXT_TESTCASE(mutex, &mutex_A, inner, inner_lock)
 
 GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(RCU, )
-GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(RAW_SPINLOCK, raw_lock_B)
-GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(SPINLOCK, lock_B)
-GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(MUTEX, mutex_B)
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(raw_spinlock, &raw_lock_B)
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(spinlock, &lock_B)
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(mutex, &mutex_B)
 
 /* the outer context allows all kinds of preemption */
 #define DO_CONTEXT_TESTCASE_OUTER_PREEMPTIBLE(outer)                   \
        dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);               \
-       dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);        \
-       dotest(SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);            \
-       dotest(MUTEX_in_##outer, SUCCESS, LOCKTYPE_MUTEX);              \
+       dotest(raw_spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);        \
+       dotest(spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);            \
+       dotest(mutex_in_##outer, SUCCESS, LOCKTYPE_MUTEX);              \
 
 /*
  * the outer context only allows the preemption introduced by spinlock_t (which
@@ -2654,16 +2589,16 @@ GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(MUTEX, mutex_B)
  */
 #define DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(outer)           \
        dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);               \
-       dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);        \
-       dotest(SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);            \
-       dotest(MUTEX_in_##outer, FAILURE, LOCKTYPE_MUTEX);              \
+       dotest(raw_spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);        \
+       dotest(spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);            \
+       dotest(mutex_in_##outer, FAILURE, LOCKTYPE_MUTEX);              \
 
 /* the outer doesn't allows any kind of preemption */
 #define DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(outer)                       \
        dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);               \
-       dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);        \
-       dotest(SPINLOCK_in_##outer, FAILURE, LOCKTYPE_SPIN);            \
-       dotest(MUTEX_in_##outer, FAILURE, LOCKTYPE_MUTEX);              \
+       dotest(raw_spinlock_in_##outer, SUCCESS, LOCKTYPE_SPIN);        \
+       dotest(spinlock_in_##outer, FAILURE, LOCKTYPE_SPIN);            \
+       dotest(mutex_in_##outer, FAILURE, LOCKTYPE_MUTEX);              \
 
 static void wait_context_tests(void)
 {
@@ -2697,15 +2632,15 @@ static void wait_context_tests(void)
        pr_cont("\n");
 
        print_testname("in RAW_SPINLOCK context");
-       DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(RAW_SPINLOCK);
+       DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(raw_spinlock);
        pr_cont("\n");
 
        print_testname("in SPINLOCK context");
-       DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(SPINLOCK);
+       DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(spinlock);
        pr_cont("\n");
 
        print_testname("in MUTEX context");
-       DO_CONTEXT_TESTCASE_OUTER_PREEMPTIBLE(MUTEX);
+       DO_CONTEXT_TESTCASE_OUTER_PREEMPTIBLE(mutex);
        pr_cont("\n");
 }
 
index bfffbb7..f723024 100644 (file)
@@ -3692,7 +3692,8 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
        mas->offset = slot;
        pivots[slot] = mas->last;
        if (mas->last != ULONG_MAX)
-               slot++;
+               pivots[++slot] = ULONG_MAX;
+
        mas->depth = 1;
        mas_set_height(mas);
        ma_set_meta(node, maple_leaf_64, 0, slot);
@@ -4264,6 +4265,10 @@ static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
  * mas_wr_append: Attempt to append
  * @wr_mas: the maple write state
  *
+ * This is currently unsafe in rcu mode since the end of the node may be cached
+ * by readers while the node contents may be updated which could result in
+ * inaccurate information.
+ *
  * Return: True if appended, false otherwise
  */
 static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
@@ -4273,6 +4278,9 @@ static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
        struct ma_state *mas = wr_mas->mas;
        unsigned char node_pivots = mt_pivots[wr_mas->type];
 
+       if (mt_in_rcu(mas->tree))
+               return false;
+
        if (mas->offset != wr_mas->node_end)
                return false;
 
index 1a31065..976b9bd 100644 (file)
@@ -1136,7 +1136,6 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 void __rcu **radix_tree_iter_resume(void __rcu **slot,
                                        struct radix_tree_iter *iter)
 {
-       slot++;
        iter->index = __radix_tree_iter_add(iter, 1);
        iter->next_index = iter->index;
        iter->tags = 0;
index eff4e42..d0a5081 100644 (file)
@@ -550,7 +550,7 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
 
 static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
 {
-       int i, wake_index;
+       int i, wake_index, woken;
 
        if (!atomic_read(&sbq->ws_active))
                return;
@@ -567,13 +567,12 @@ static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
                 */
                wake_index = sbq_index_inc(wake_index);
 
-               /*
-                * It is sufficient to wake up at least one waiter to
-                * guarantee forward progress.
-                */
-               if (waitqueue_active(&ws->wait) &&
-                   wake_up_nr(&ws->wait, nr))
-                       break;
+               if (waitqueue_active(&ws->wait)) {
+                       woken = wake_up_nr(&ws->wait, nr);
+                       if (woken == nr)
+                               break;
+                       nr -= woken;
+               }
        }
 
        if (wake_index != atomic_read(&sbq->wake_index))
index e86231a..c65566b 100644 (file)
@@ -1148,7 +1148,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter,
 
 failed:
        while (sgtable->nents > sgtable->orig_nents)
-               put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
+               unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
        return res;
 }
 
index 187f5b2..f2ea9f3 100644 (file)
@@ -1161,6 +1161,10 @@ static void __init test_bitmap_print_buf(void)
        }
 }
 
+/*
+ * FIXME: Clang breaks compile-time evaluations when KASAN and GCOV are enabled.
+ * To workaround it, GCOV is force-disabled in Makefile for this configuration.
+ */
 static void __init test_bitmap_const_eval(void)
 {
        DECLARE_BITMAP(bitmap, BITS_PER_LONG);
@@ -1186,11 +1190,7 @@ static void __init test_bitmap_const_eval(void)
         * the compiler is fixed.
         */
        bitmap_clear(bitmap, 0, BITS_PER_LONG);
-#if defined(__s390__) && defined(__clang__)
-       if (!const_test_bit(7, bitmap))
-#else
        if (!test_bit(7, bitmap))
-#endif
                bitmap_set(bitmap, 5, 2);
 
        /* Equals to `unsigned long bitopvar = BIT(20)` */
index 9939be3..8d4c92c 100644 (file)
@@ -1898,13 +1898,16 @@ static noinline void __init next_prev_test(struct maple_tree *mt)
                                                   725};
        static const unsigned long level2_32[] = { 1747, 2000, 1750, 1755,
                                                   1760, 1765};
+       unsigned long last_index;
 
        if (MAPLE_32BIT) {
                nr_entries = 500;
                level2 = level2_32;
+               last_index = 0x138e;
        } else {
                nr_entries = 200;
                level2 = level2_64;
+               last_index = 0x7d6;
        }
 
        for (i = 0; i <= nr_entries; i++)
@@ -2011,7 +2014,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt)
 
        val = mas_next(&mas, ULONG_MAX);
        MT_BUG_ON(mt, val != NULL);
-       MT_BUG_ON(mt, mas.index != 0x7d6);
+       MT_BUG_ON(mt, mas.index != last_index);
        MT_BUG_ON(mt, mas.last != ULONG_MAX);
 
        val = mas_prev(&mas, 0);
index 678530a..d4ee209 100644 (file)
@@ -51,7 +51,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o fadvise.o \
                           readahead.o swap.o truncate.o vmscan.o shmem.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
                           mm_init.o percpu.o slab_common.o \
-                          compaction.o show_mem.o\
+                          compaction.o show_mem.o shmem_quota.o\
                           interval_tree.o list_lru.o workingset.o \
                           debug.o gup.o mmap_lock.o $(mmu-y)
 
index dbc9f86..eacca27 100644 (file)
@@ -912,11 +912,12 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
                /*
                 * Check if the pageblock has already been marked skipped.
-                * Only the aligned PFN is checked as the caller isolates
+                * Only the first PFN is checked as the caller isolates
                 * COMPACT_CLUSTER_MAX at a time so the second call must
                 * not falsely conclude that the block should be skipped.
                 */
-               if (!valid_page && pageblock_aligned(low_pfn)) {
+               if (!valid_page && (pageblock_aligned(low_pfn) ||
+                                   low_pfn == cc->zone->zone_start_pfn)) {
                        if (!isolation_suitable(cc, page)) {
                                low_pfn = end_pfn;
                                folio = NULL;
@@ -2002,7 +2003,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
                 * before making it "skip" so other compaction instances do
                 * not scan the same block.
                 */
-               if (pageblock_aligned(low_pfn) &&
+               if ((pageblock_aligned(low_pfn) ||
+                    low_pfn == cc->zone->zone_start_pfn) &&
                    !fast_find_block && !isolation_suitable(cc, page))
                        continue;
 
index c112101..bb07721 100644 (file)
@@ -320,25 +320,25 @@ static void damon_test_update_monitoring_result(struct kunit *test)
 
 static void damon_test_set_attrs(struct kunit *test)
 {
-       struct damon_ctx ctx;
+       struct damon_ctx *c = damon_new_ctx();
        struct damon_attrs valid_attrs = {
                .min_nr_regions = 10, .max_nr_regions = 1000,
                .sample_interval = 5000, .aggr_interval = 100000,};
        struct damon_attrs invalid_attrs;
 
-       KUNIT_EXPECT_EQ(test, damon_set_attrs(&ctx, &valid_attrs), 0);
+       KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &valid_attrs), 0);
 
        invalid_attrs = valid_attrs;
        invalid_attrs.min_nr_regions = 1;
-       KUNIT_EXPECT_EQ(test, damon_set_attrs(&ctx, &invalid_attrs), -EINVAL);
+       KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
 
        invalid_attrs = valid_attrs;
        invalid_attrs.max_nr_regions = 9;
-       KUNIT_EXPECT_EQ(test, damon_set_attrs(&ctx, &invalid_attrs), -EINVAL);
+       KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
 
        invalid_attrs = valid_attrs;
        invalid_attrs.aggr_interval = 4999;
-       KUNIT_EXPECT_EQ(test, damon_set_attrs(&ctx, &invalid_attrs), -EINVAL);
+       KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
 }
 
 static struct kunit_case damon_test_cases[] = {
index 91cff7f..eb95809 100644 (file)
@@ -273,6 +273,7 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type,
                return NULL;
        filter->type = type;
        filter->matching = matching;
+       INIT_LIST_HEAD(&filter->list);
        return filter;
 }
 
index 2fcc973..e0e59d4 100644 (file)
@@ -386,6 +386,7 @@ out:
 static const struct mm_walk_ops damon_mkold_ops = {
        .pmd_entry = damon_mkold_pmd_entry,
        .hugetlb_entry = damon_mkold_hugetlb_entry,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@@ -525,6 +526,7 @@ out:
 static const struct mm_walk_ops damon_young_ops = {
        .pmd_entry = damon_young_pmd_entry,
        .hugetlb_entry = damon_young_hugetlb_entry,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
index 9e44a49..baafbf3 100644 (file)
@@ -1855,30 +1855,15 @@ out:
  *
  * Looks up the page cache entry at @mapping & @index.
  *
- * @fgp_flags can be zero or more of these flags:
- *
- * * %FGP_ACCESSED - The folio will be marked accessed.
- * * %FGP_LOCK - The folio is returned locked.
- * * %FGP_CREAT - If no page is present then a new page is allocated using
- *   @gfp and added to the page cache and the VM's LRU list.
- *   The page is returned locked and with an increased refcount.
- * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
- *   page is already in cache.  If the page was allocated, unlock it before
- *   returning so the caller can do the same dance.
- * * %FGP_WRITE - The page will be written to by the caller.
- * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
- * * %FGP_NOWAIT - Don't get blocked by page lock.
- * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
- *
  * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
  * if the %GFP flags specified for %FGP_CREAT are atomic.
  *
- * If there is a page cache page, it is returned with an increased refcount.
+ * If this function returns a folio, it is returned with an increased refcount.
  *
  * Return: The found folio or an ERR_PTR() otherwise.
  */
 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
-               int fgp_flags, gfp_t gfp)
+               fgf_t fgp_flags, gfp_t gfp)
 {
        struct folio *folio;
 
@@ -1920,7 +1905,9 @@ repeat:
                folio_wait_stable(folio);
 no_page:
        if (!folio && (fgp_flags & FGP_CREAT)) {
+               unsigned order = FGF_GET_ORDER(fgp_flags);
                int err;
+
                if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
                        gfp |= __GFP_WRITE;
                if (fgp_flags & FGP_NOFS)
@@ -1929,26 +1916,44 @@ no_page:
                        gfp &= ~GFP_KERNEL;
                        gfp |= GFP_NOWAIT | __GFP_NOWARN;
                }
-
-               folio = filemap_alloc_folio(gfp, 0);
-               if (!folio)
-                       return ERR_PTR(-ENOMEM);
-
                if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
                        fgp_flags |= FGP_LOCK;
 
-               /* Init accessed so avoid atomic mark_page_accessed later */
-               if (fgp_flags & FGP_ACCESSED)
-                       __folio_set_referenced(folio);
+               if (!mapping_large_folio_support(mapping))
+                       order = 0;
+               if (order > MAX_PAGECACHE_ORDER)
+                       order = MAX_PAGECACHE_ORDER;
+               /* If we're not aligned, allocate a smaller folio */
+               if (index & ((1UL << order) - 1))
+                       order = __ffs(index);
 
-               err = filemap_add_folio(mapping, folio, index, gfp);
-               if (unlikely(err)) {
+               do {
+                       gfp_t alloc_gfp = gfp;
+
+                       err = -ENOMEM;
+                       if (order == 1)
+                               order = 0;
+                       if (order > 0)
+                               alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
+                       folio = filemap_alloc_folio(alloc_gfp, order);
+                       if (!folio)
+                               continue;
+
+                       /* Init accessed so avoid atomic mark_page_accessed later */
+                       if (fgp_flags & FGP_ACCESSED)
+                               __folio_set_referenced(folio);
+
+                       err = filemap_add_folio(mapping, folio, index, gfp);
+                       if (!err)
+                               break;
                        folio_put(folio);
                        folio = NULL;
-                       if (err == -EEXIST)
-                               goto repeat;
-               }
+               } while (order-- > 0);
 
+               if (err == -EEXIST)
+                       goto repeat;
+               if (err)
+                       return ERR_PTR(err);
                /*
                 * filemap_add_folio locks the page, and for mmap
                 * we expect an unlocked page.
index c6f056c..10c3247 100644 (file)
@@ -92,7 +92,7 @@ EXPORT_SYMBOL(add_to_page_cache_lru);
 
 noinline
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
-               int fgp_flags, gfp_t gfp)
+               fgf_t fgp_flags, gfp_t gfp)
 {
        struct folio *folio;
 
index 76d222c..6e2f9e9 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
        pte = ptep_get(ptep);
        if (!pte_present(pte))
                goto no_page;
-       if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+       if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
                goto no_page;
 
        page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
        if (likely(!pmd_trans_huge(pmdval)))
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 
-       if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+       if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
                return no_page_table(vma, flags);
 
        ptl = pmd_lock(mm, pmd);
@@ -851,6 +851,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
        if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
                return NULL;
 
+       /*
+        * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+        * to fail on PROT_NONE-mapped pages.
+        */
        page = follow_page_mask(vma, address, foll_flags, &ctx);
        if (ctx.pgmap)
                put_dev_pagemap(ctx.pgmap);
@@ -2227,6 +2231,13 @@ static bool is_valid_gup_args(struct page **pages, int *locked,
                gup_flags |= FOLL_UNLOCKABLE;
        }
 
+       /*
+        * For now, always trigger NUMA hinting faults. Some GUP users like
+        * KVM require the hint to be as the calling context of GUP is
+        * functionally similar to a memory reference from task context.
+        */
+       gup_flags |= FOLL_HONOR_NUMA_FAULT;
+
        /* FOLL_GET and FOLL_PIN are mutually exclusive. */
        if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
                         (FOLL_PIN | FOLL_GET)))
@@ -2551,7 +2562,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
                struct page *page;
                struct folio *folio;
 
-               if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+               /*
+                * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+                * pte_access_permitted() better should reject these pages
+                * either way: otherwise, GUP-fast might succeed in
+                * cases where ordinary GUP would fail due to VMA access
+                * permissions.
+                */
+               if (pte_protnone(pte))
                        goto pte_unmap;
 
                if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2970,8 +2988,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
 
                if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
                             pmd_devmap(pmd))) {
-                       if (pmd_protnone(pmd) &&
-                           !gup_can_follow_protnone(flags))
+                       /* See gup_pte_range() */
+                       if (pmd_protnone(pmd))
                                return 0;
 
                        if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3151,7 +3169,7 @@ static int internal_get_user_pages_fast(unsigned long start,
        if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
                                       FOLL_FORCE | FOLL_PIN | FOLL_GET |
                                       FOLL_FAST_ONLY | FOLL_NOFAULT |
-                                      FOLL_PCI_P2PDMA)))
+                                      FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
                return -EINVAL;
 
        if (gup_flags & FOLL_PIN)
index 855e25e..277ddca 100644 (file)
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -562,6 +562,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
        .pte_hole       = hmm_vma_walk_hole,
        .hugetlb_entry  = hmm_vma_walk_hugetlb_entry,
        .test_walk      = hmm_vma_walk_test,
+       .walk_lock      = PGWALK_RDLOCK,
 };
 
 /**
index eb36783..e4f0266 100644 (file)
@@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
        if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
                return ERR_PTR(-EFAULT);
 
-       /* Full NUMA hinting faults to serialise migration in fault paths */
-       if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
+       if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
                return NULL;
 
        if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
@@ -1613,7 +1612,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
         * If other processes are mapping this folio, we couldn't discard
         * the folio unless they all do MADV_FREE so let's skip the folio.
         */
-       if (folio_mapcount(folio) != 1)
+       if (folio_estimated_sharers(folio) != 1)
                goto out;
 
        if (!folio_trylock(folio))
@@ -2521,7 +2520,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        struct address_space *swap_cache = NULL;
        unsigned long offset = 0;
        unsigned int nr = thp_nr_pages(head);
-       int i;
+       int i, nr_dropped = 0;
 
        /* complete memcg works before add pages to LRU */
        split_page_memcg(head, nr);
@@ -2546,7 +2545,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                        struct folio *tail = page_folio(head + i);
 
                        if (shmem_mapping(head->mapping))
-                               shmem_uncharge(head->mapping->host, 1);
+                               nr_dropped++;
                        else if (folio_test_clear_dirty(tail))
                                folio_account_cleaned(tail,
                                        inode_to_wb(folio->mapping->host));
@@ -2583,6 +2582,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        }
        local_irq_enable();
 
+       if (nr_dropped)
+               shmem_uncharge(head->mapping->host, nr_dropped);
        remap_page(folio, nr);
 
        if (PageSwapCache(head)) {
index 64a3239..6da626b 100644 (file)
@@ -1579,9 +1579,37 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
                                                unsigned int order) { }
 #endif
 
+static inline void __clear_hugetlb_destructor(struct hstate *h,
+                                               struct folio *folio)
+{
+       lockdep_assert_held(&hugetlb_lock);
+
+       /*
+        * Very subtle
+        *
+        * For non-gigantic pages set the destructor to the normal compound
+        * page dtor.  This is needed in case someone takes an additional
+        * temporary ref to the page, and freeing is delayed until they drop
+        * their reference.
+        *
+        * For gigantic pages set the destructor to the null dtor.  This
+        * destructor will never be called.  Before freeing the gigantic
+        * page destroy_compound_gigantic_folio will turn the folio into a
+        * simple group of pages.  After this the destructor does not
+        * apply.
+        *
+        */
+       if (hstate_is_gigantic(h))
+               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
+       else
+               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
+}
+
 /*
- * Remove hugetlb folio from lists, and update dtor so that the folio appears
- * as just a compound page.
+ * Remove hugetlb folio from lists.
+ * If vmemmap exists for the folio, update dtor so that the folio appears
+ * as just a compound page.  Otherwise, wait until after allocating vmemmap
+ * to update dtor.
  *
  * A reference is held on the folio, except in the case of demote.
  *
@@ -1612,31 +1640,19 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
        }
 
        /*
-        * Very subtle
-        *
-        * For non-gigantic pages set the destructor to the normal compound
-        * page dtor.  This is needed in case someone takes an additional
-        * temporary ref to the page, and freeing is delayed until they drop
-        * their reference.
-        *
-        * For gigantic pages set the destructor to the null dtor.  This
-        * destructor will never be called.  Before freeing the gigantic
-        * page destroy_compound_gigantic_folio will turn the folio into a
-        * simple group of pages.  After this the destructor does not
-        * apply.
-        *
-        * This handles the case where more than one ref is held when and
-        * after update_and_free_hugetlb_folio is called.
-        *
-        * In the case of demote we do not ref count the page as it will soon
-        * be turned into a page of smaller size.
+        * We can only clear the hugetlb destructor after allocating vmemmap
+        * pages.  Otherwise, someone (memory error handling) may try to write
+        * to tail struct pages.
+        */
+       if (!folio_test_hugetlb_vmemmap_optimized(folio))
+               __clear_hugetlb_destructor(h, folio);
+
+        /*
+         * In the case of demote we do not ref count the page as it will soon
+         * be turned into a page of smaller size.
         */
        if (!demote)
                folio_ref_unfreeze(folio, 1);
-       if (hstate_is_gigantic(h))
-               folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
-       else
-               folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
 
        h->nr_huge_pages--;
        h->nr_huge_pages_node[nid]--;
@@ -1705,6 +1721,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
 {
        int i;
        struct page *subpage;
+       bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);
 
        if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                return;
@@ -1735,6 +1752,16 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
        if (unlikely(folio_test_hwpoison(folio)))
                folio_clear_hugetlb_hwpoison(folio);
 
+       /*
+        * If vmemmap pages were allocated above, then we need to clear the
+        * hugetlb destructor under the hugetlb lock.
+        */
+       if (clear_dtor) {
+               spin_lock_irq(&hugetlb_lock);
+               __clear_hugetlb_destructor(h, folio);
+               spin_unlock_irq(&hugetlb_lock);
+       }
+
        for (i = 0; i < pages_per_huge_page(h); i++) {
                subpage = folio_page(folio, i);
                subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
index a7d9e98..8ed127c 100644 (file)
@@ -924,6 +924,13 @@ int migrate_device_coherent_page(struct page *page);
 struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
 int __must_check try_grab_page(struct page *page, unsigned int flags);
 
+/*
+ * mm/huge_memory.c
+ */
+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
+                                  unsigned long addr, pmd_t *pmd,
+                                  unsigned int flags);
+
 enum {
        /* mark page accessed */
        FOLL_TOUCH = 1 << 16,
@@ -998,6 +1005,16 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,
                smp_rmb();
 
        /*
+        * During GUP-fast we might not get called on the head page for a
+        * hugetlb page that is mapped using cont-PTE, because GUP-fast does
+        * not work with the abstracted hugetlb PTEs that always point at the
+        * head page. For hugetlb, PageAnonExclusive only applies on the head
+        * page (as it cannot be partially COW-shared), so lookup the head page.
+        */
+       if (unlikely(!PageHead(page) && PageHuge(page)))
+               page = compound_head(page);
+
+       /*
         * Note that PageKsm() pages cannot be exclusive, and consequently,
         * cannot get pinned.
         */
index 78c8d5d..47d1d32 100644 (file)
@@ -1955,10 +1955,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
                                                goto xa_locked;
                                        }
                                }
-                               if (!shmem_charge(mapping->host, 1)) {
-                                       result = SCAN_FAIL;
-                                       goto xa_locked;
-                               }
                                nr_none++;
                                continue;
                        }
@@ -2145,8 +2141,13 @@ xa_unlocked:
         */
        try_to_unmap_flush();
 
-       if (result != SCAN_SUCCEED)
+       if (result == SCAN_SUCCEED && nr_none &&
+           !shmem_charge(mapping->host, nr_none))
+               result = SCAN_FAIL;
+       if (result != SCAN_SUCCEED) {
+               nr_none = 0;
                goto rollback;
+       }
 
        /*
         * The old pages are locked, so they won't change anymore.
@@ -2283,8 +2284,8 @@ rollback:
        if (nr_none) {
                xas_lock_irq(&xas);
                mapping->nrpages -= nr_none;
-               shmem_uncharge(mapping->host, nr_none);
                xas_unlock_irq(&xas);
+               shmem_uncharge(mapping->host, nr_none);
        }
 
        list_for_each_entry_safe(page, tmp, &pagelist, lru) {
index ba26635..d7b5b95 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -455,6 +455,12 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
 
 static const struct mm_walk_ops break_ksm_ops = {
        .pmd_entry = break_ksm_pmd_entry,
+       .walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops break_ksm_lock_vma_ops = {
+       .pmd_entry = break_ksm_pmd_entry,
+       .walk_lock = PGWALK_WRLOCK,
 };
 
 /*
@@ -470,16 +476,17 @@ static const struct mm_walk_ops break_ksm_ops = {
  * of the process that owns 'vma'.  We also do not want to enforce
  * protection keys here anyway.
  */
-static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
 {
        vm_fault_t ret = 0;
+       const struct mm_walk_ops *ops = lock_vma ?
+                               &break_ksm_lock_vma_ops : &break_ksm_ops;
 
        do {
                int ksm_page;
 
                cond_resched();
-               ksm_page = walk_page_range_vma(vma, addr, addr + 1,
-                                              &break_ksm_ops, NULL);
+               ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
                if (WARN_ON_ONCE(ksm_page < 0))
                        return ksm_page;
                if (!ksm_page)
@@ -565,7 +572,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
        mmap_read_lock(mm);
        vma = find_mergeable_vma(mm, addr);
        if (vma)
-               break_ksm(vma, addr);
+               break_ksm(vma, addr, false);
        mmap_read_unlock(mm);
 }
 
@@ -871,7 +878,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
  * in cmp_and_merge_page on one of the rmap_items we would be removing.
  */
 static int unmerge_ksm_pages(struct vm_area_struct *vma,
-                            unsigned long start, unsigned long end)
+                            unsigned long start, unsigned long end, bool lock_vma)
 {
        unsigned long addr;
        int err = 0;
@@ -882,7 +889,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
                if (signal_pending(current))
                        err = -ERESTARTSYS;
                else
-                       err = break_ksm(vma, addr);
+                       err = break_ksm(vma, addr, lock_vma);
        }
        return err;
 }
@@ -1029,7 +1036,7 @@ static int unmerge_and_remove_all_rmap_items(void)
                        if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
                                continue;
                        err = unmerge_ksm_pages(vma,
-                                               vma->vm_start, vma->vm_end);
+                                               vma->vm_start, vma->vm_end, false);
                        if (err)
                                goto error;
                }
@@ -2530,7 +2537,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
                return 0;
 
        if (vma->anon_vma) {
-               err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+               err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
                if (err)
                        return err;
        }
@@ -2668,7 +2675,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
                        return 0;               /* just ignore the advice */
 
                if (vma->anon_vma) {
-                       err = unmerge_ksm_pages(vma, start, end);
+                       err = unmerge_ksm_pages(vma, start, end, true);
                        if (err)
                                return err;
                }
@@ -2784,6 +2791,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
                        anon_vma->root == vma->anon_vma->root) {
                return page;            /* still no need to copy it */
        }
+       if (PageHWPoison(page))
+               return ERR_PTR(-EHWPOISON);
        if (!PageUptodate(page))
                return page;            /* let do_swap_page report the error */
 
index 886f060..ec30f48 100644 (file)
@@ -233,6 +233,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 
 static const struct mm_walk_ops swapin_walk_ops = {
        .pmd_entry              = swapin_walk_pmd_entry,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 static void shmem_swapin_range(struct vm_area_struct *vma,
@@ -383,7 +384,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
                folio = pfn_folio(pmd_pfn(orig_pmd));
 
                /* Do not interfere with other mappings of this folio */
-               if (folio_mapcount(folio) != 1)
+               if (folio_estimated_sharers(folio) != 1)
                        goto huge_unlock;
 
                if (pageout_anon_only_filter && !folio_test_anon(folio))
@@ -457,7 +458,7 @@ regular_folio:
                if (folio_test_large(folio)) {
                        int err;
 
-                       if (folio_mapcount(folio) != 1)
+                       if (folio_estimated_sharers(folio) != 1)
                                break;
                        if (pageout_anon_only_filter && !folio_test_anon(folio))
                                break;
@@ -534,6 +535,7 @@ regular_folio:
 
 static const struct mm_walk_ops cold_walk_ops = {
        .pmd_entry = madvise_cold_or_pageout_pte_range,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 static void madvise_cold_page_range(struct mmu_gather *tlb,
@@ -678,7 +680,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                if (folio_test_large(folio)) {
                        int err;
 
-                       if (folio_mapcount(folio) != 1)
+                       if (folio_estimated_sharers(folio) != 1)
                                break;
                        if (!folio_trylock(folio))
                                break;
@@ -757,6 +759,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 
 static const struct mm_walk_ops madvise_free_walk_ops = {
        .pmd_entry              = madvise_free_pte_range,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 static int madvise_free_single_vma(struct vm_area_struct *vma,
index e8ca4bd..315fd5f 100644 (file)
@@ -6024,6 +6024,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
 
 static const struct mm_walk_ops precharge_walk_ops = {
        .pmd_entry      = mem_cgroup_count_precharge_pte_range,
+       .walk_lock      = PGWALK_RDLOCK,
 };
 
 static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
@@ -6303,6 +6304,7 @@ put:                      /* get_mctgt_type() gets & locks the page */
 
 static const struct mm_walk_ops charge_walk_ops = {
        .pmd_entry      = mem_cgroup_move_charge_pte_range,
+       .walk_lock      = PGWALK_RDLOCK,
 };
 
 static void mem_cgroup_move_charge(void)
index e245191..fe121fd 100644 (file)
@@ -831,6 +831,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
 static const struct mm_walk_ops hwp_walk_ops = {
        .pmd_entry = hwpoison_pte_range,
        .hugetlb_entry = hwpoison_hugetlb_range,
+       .walk_lock = PGWALK_RDLOCK,
 };
 
 /*
@@ -2466,7 +2467,7 @@ int unpoison_memory(unsigned long pfn)
 {
        struct folio *folio;
        struct page *p;
-       int ret = -EBUSY;
+       int ret = -EBUSY, ghp;
        unsigned long count = 1;
        bool huge = false;
        static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -2487,7 +2488,7 @@ int unpoison_memory(unsigned long pfn)
                goto unlock_mutex;
        }
 
-       if (!folio_test_hwpoison(folio)) {
+       if (!PageHWPoison(p)) {
                unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
                                 pfn, &unpoison_rs);
                goto unlock_mutex;
@@ -2499,6 +2500,13 @@ int unpoison_memory(unsigned long pfn)
                goto unlock_mutex;
        }
 
+       if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+               goto unlock_mutex;
+
+       /*
+        * Note that folio->_mapcount is overloaded in SLAB, so the simple test
+        * in folio_mapped() has to be done after folio_test_slab() is checked.
+        */
        if (folio_mapped(folio)) {
                unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
                                 pfn, &unpoison_rs);
@@ -2511,32 +2519,28 @@ int unpoison_memory(unsigned long pfn)
                goto unlock_mutex;
        }
 
-       if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
-               goto unlock_mutex;
-
-       ret = get_hwpoison_page(p, MF_UNPOISON);
-       if (!ret) {
+       ghp = get_hwpoison_page(p, MF_UNPOISON);
+       if (!ghp) {
                if (PageHuge(p)) {
                        huge = true;
                        count = folio_free_raw_hwp(folio, false);
-                       if (count == 0) {
-                               ret = -EBUSY;
+                       if (count == 0)
                                goto unlock_mutex;
-                       }
                }
                ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY;
-       } else if (ret < 0) {
-               if (ret == -EHWPOISON) {
+       } else if (ghp < 0) {
+               if (ghp == -EHWPOISON) {
                        ret = put_page_back_buddy(p) ? 0 : -EBUSY;
-               } else
+               } else {
+                       ret = ghp;
                        unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
                                         pfn, &unpoison_rs);
+               }
        } else {
                if (PageHuge(p)) {
                        huge = true;
                        count = folio_free_raw_hwp(folio, false);
                        if (count == 0) {
-                               ret = -EBUSY;
                                folio_put(folio);
                                goto unlock_mutex;
                        }
@@ -2737,10 +2741,13 @@ retry:
        if (ret > 0) {
                ret = soft_offline_in_use_page(page);
        } else if (ret == 0) {
-               if (!page_handle_poison(page, true, false) && try_again) {
-                       try_again = false;
-                       flags &= ~MF_COUNT_INCREASED;
-                       goto retry;
+               if (!page_handle_poison(page, true, false)) {
+                       if (try_again) {
+                               try_again = false;
+                               flags &= ~MF_COUNT_INCREASED;
+                               goto retry;
+                       }
+                       ret = -EBUSY;
                }
        }
 
index 01f39e8..cdc4d4c 100644 (file)
@@ -5257,11 +5257,8 @@ EXPORT_SYMBOL_GPL(handle_mm_fault);
 
 static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
 {
-       /* Even if this succeeds, make it clear we *might* have slept */
-       if (likely(mmap_read_trylock(mm))) {
-               might_sleep();
+       if (likely(mmap_read_trylock(mm)))
                return true;
-       }
 
        if (regs && !user_mode(regs)) {
                unsigned long ip = instruction_pointer(regs);
@@ -5393,27 +5390,28 @@ retry:
        if (!vma_is_anonymous(vma) && !vma_is_tcp(vma))
                goto inval;
 
-       /* find_mergeable_anon_vma uses adjacent vmas which are not locked */
-       if (!vma->anon_vma && !vma_is_tcp(vma))
-               goto inval;
-
        if (!vma_start_read(vma))
                goto inval;
 
        /*
+        * find_mergeable_anon_vma uses adjacent vmas which are not locked.
+        * This check must happen after vma_start_read(); otherwise, a
+        * concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA
+        * from its anon_vma.
+        */
+       if (unlikely(!vma->anon_vma && !vma_is_tcp(vma)))
+               goto inval_end_read;
+
+       /*
         * Due to the possibility of userfault handler dropping mmap_lock, avoid
         * it for now and fall back to page fault handling under mmap_lock.
         */
-       if (userfaultfd_armed(vma)) {
-               vma_end_read(vma);
-               goto inval;
-       }
+       if (userfaultfd_armed(vma))
+               goto inval_end_read;
 
        /* Check since vm_start/vm_end might change before we lock the VMA */
-       if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
-               vma_end_read(vma);
-               goto inval;
-       }
+       if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+               goto inval_end_read;
 
        /* Check if the VMA got isolated after we found it */
        if (vma->detached) {
@@ -5425,6 +5423,9 @@ retry:
 
        rcu_read_unlock();
        return vma;
+
+inval_end_read:
+       vma_end_read(vma);
 inval:
        rcu_read_unlock();
        count_vm_vma_lock_event(VMA_LOCK_ABORT);
@@ -5701,6 +5702,9 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
        if (mmap_read_lock_killable(mm))
                return 0;
 
+       /* Untag the address before looking up the VMA */
+       addr = untagged_addr_remote(mm, addr);
+
        /* Avoid triggering the temporary warning in __get_user_pages */
        if (!vma_lookup(mm, addr) && !expand_stack(mm, addr))
                return 0;
index edc2519..ec2eace 100644 (file)
@@ -384,8 +384,10 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
        VMA_ITERATOR(vmi, mm, 0);
 
        mmap_write_lock(mm);
-       for_each_vma(vmi, vma)
+       for_each_vma(vmi, vma) {
+               vma_start_write(vma);
                mpol_rebind_policy(vma->vm_policy, new);
+       }
        mmap_write_unlock(mm);
 }
 
@@ -716,6 +718,14 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
        .hugetlb_entry          = queue_folios_hugetlb,
        .pmd_entry              = queue_folios_pte_range,
        .test_walk              = queue_pages_test_walk,
+       .walk_lock              = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
+       .hugetlb_entry          = queue_folios_hugetlb,
+       .pmd_entry              = queue_folios_pte_range,
+       .test_walk              = queue_pages_test_walk,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 /*
@@ -736,7 +746,7 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
 static int
 queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                nodemask_t *nodes, unsigned long flags,
-               struct list_head *pagelist)
+               struct list_head *pagelist, bool lock_vma)
 {
        int err;
        struct queue_pages qp = {
@@ -747,8 +757,10 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                .end = end,
                .first = NULL,
        };
+       const struct mm_walk_ops *ops = lock_vma ?
+                       &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
 
-       err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
+       err = walk_page_range(mm, start, end, ops, &qp);
 
        if (!qp.first)
                /* whole range in hole */
@@ -768,6 +780,8 @@ static int vma_replace_policy(struct vm_area_struct *vma,
        struct mempolicy *old;
        struct mempolicy *new;
 
+       vma_assert_write_locked(vma);
+
        pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
                 vma->vm_start, vma->vm_end, vma->vm_pgoff,
                 vma->vm_ops, vma->vm_file,
@@ -1074,7 +1088,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
        vma = find_vma(mm, 0);
        VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
        queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
-                       flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+                       flags | MPOL_MF_DISCONTIG_OK, &pagelist, false);
 
        if (!list_empty(&pagelist)) {
                err = migrate_pages(&pagelist, alloc_migration_target, NULL,
@@ -1313,8 +1327,12 @@ static long do_mbind(unsigned long start, unsigned long len,
        if (err)
                goto mpol_out;
 
+       /*
+        * Lock the VMAs before scanning for pages to migrate, to ensure we don't
+        * miss a concurrently inserted page.
+        */
        ret = queue_pages_range(mm, start, end, nmask,
-                         flags | MPOL_MF_INVERT, &pagelist);
+                         flags | MPOL_MF_INVERT, &pagelist, true);
 
        if (ret < 0) {
                err = ret;
@@ -1538,6 +1556,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
                        break;
                }
 
+               vma_start_write(vma);
                new->home_node = home_node;
                err = mbind_range(&vmi, vma, &prev, start, end, new);
                mpol_put(new);
index 8365158..d5f4923 100644 (file)
@@ -279,6 +279,7 @@ next:
 static const struct mm_walk_ops migrate_vma_walk_ops = {
        .pmd_entry              = migrate_vma_collect_pmd,
        .pte_hole               = migrate_vma_collect_hole,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 /*
index b7f7a51..dad3622 100644 (file)
@@ -176,6 +176,7 @@ static const struct mm_walk_ops mincore_walk_ops = {
        .pmd_entry              = mincore_pte_range,
        .pte_hole               = mincore_unmapped_range,
        .hugetlb_entry          = mincore_hugetlb,
+       .walk_lock              = PGWALK_RDLOCK,
 };
 
 /*
index d7db945..479e09d 100644 (file)
@@ -371,6 +371,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
 {
        static const struct mm_walk_ops mlock_walk_ops = {
                .pmd_entry = mlock_pte_range,
+               .walk_lock = PGWALK_WRLOCK_VERIFY,
        };
 
        /*
@@ -477,7 +478,6 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
 {
        unsigned long nstart, end, tmp;
        struct vm_area_struct *vma, *prev;
-       int error;
        VMA_ITERATOR(vmi, current->mm, start);
 
        VM_BUG_ON(offset_in_page(start));
@@ -498,6 +498,7 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
        nstart = start;
        tmp = vma->vm_start;
        for_each_vma_range(vmi, vma, end) {
+               int error;
                vm_flags_t newflags;
 
                if (vma->vm_start != tmp)
@@ -511,14 +512,15 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
                        tmp = end;
                error = mlock_fixup(&vmi, vma, &prev, nstart, tmp, newflags);
                if (error)
-                       break;
+                       return error;
+               tmp = vma_iter_end(&vmi);
                nstart = tmp;
        }
 
-       if (vma_iter_end(&vmi) < end)
+       if (tmp < end)
                return -ENOMEM;
 
-       return error;
+       return 0;
 }
 
 /*
index 3eda23c..3937479 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -615,6 +615,7 @@ static inline int dup_anon_vma(struct vm_area_struct *dst,
         * anon pages imported.
         */
        if (src->anon_vma && !dst->anon_vma) {
+               vma_start_write(dst);
                dst->anon_vma = src->anon_vma;
                return anon_vma_clone(dst, src);
        }
index 6f658d4..3aef134 100644 (file)
@@ -568,6 +568,7 @@ static const struct mm_walk_ops prot_none_walk_ops = {
        .pte_entry              = prot_none_pte_entry,
        .hugetlb_entry          = prot_none_hugetlb_entry,
        .test_walk              = prot_none_test,
+       .walk_lock              = PGWALK_WRLOCK,
 };
 
 int
index d3f4200..b8d3d70 100644 (file)
@@ -1193,7 +1193,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
         * write_bandwidth = ---------------------------------------------------
         *                                          period
         *
-        * @written may have decreased due to folio_account_redirty().
+        * @written may have decreased due to folio_redirty_for_writepage().
         * Avoid underflowing @bw calculation.
         */
        bw = written - min(written, wb->written_stamp);
@@ -2712,37 +2712,6 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
 EXPORT_SYMBOL(filemap_dirty_folio);
 
 /**
- * folio_account_redirty - Manually account for redirtying a page.
- * @folio: The folio which is being redirtied.
- *
- * Most filesystems should call folio_redirty_for_writepage() instead
- * of this fuction.  If your filesystem is doing writeback outside the
- * context of a writeback_control(), it can call this when redirtying
- * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED,
- * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN,
- * WB_WRITTEN) in long term. The mismatches will lead to systematic errors
- * in balanced_dirty_ratelimit and the dirty pages position control.
- */
-void folio_account_redirty(struct folio *folio)
-{
-       struct address_space *mapping = folio->mapping;
-
-       if (mapping && mapping_can_writeback(mapping)) {
-               struct inode *inode = mapping->host;
-               struct bdi_writeback *wb;
-               struct wb_lock_cookie cookie = {};
-               long nr = folio_nr_pages(folio);
-
-               wb = unlocked_inode_to_wb_begin(inode, &cookie);
-               current->nr_dirtied -= nr;
-               node_stat_mod_folio(folio, NR_DIRTIED, -nr);
-               wb_stat_mod(wb, WB_DIRTIED, -nr);
-               unlocked_inode_to_wb_end(inode, &cookie);
-       }
-}
-EXPORT_SYMBOL(folio_account_redirty);
-
-/**
  * folio_redirty_for_writepage - Decline to write a dirty folio.
  * @wbc: The writeback control.
  * @folio: The folio.
@@ -2757,13 +2726,23 @@ EXPORT_SYMBOL(folio_account_redirty);
 bool folio_redirty_for_writepage(struct writeback_control *wbc,
                struct folio *folio)
 {
-       bool ret;
+       struct address_space *mapping = folio->mapping;
        long nr = folio_nr_pages(folio);
+       bool ret;
 
        wbc->pages_skipped += nr;
-       ret = filemap_dirty_folio(folio->mapping, folio);
-       folio_account_redirty(folio);
+       ret = filemap_dirty_folio(mapping, folio);
+       if (mapping && mapping_can_writeback(mapping)) {
+               struct inode *inode = mapping->host;
+               struct bdi_writeback *wb;
+               struct wb_lock_cookie cookie = {};
 
+               wb = unlocked_inode_to_wb_begin(inode, &cookie);
+               current->nr_dirtied -= nr;
+               node_stat_mod_folio(folio, NR_DIRTIED, -nr);
+               wb_stat_mod(wb, WB_DIRTIED, -nr);
+               unlocked_inode_to_wb_end(inode, &cookie);
+       }
        return ret;
 }
 EXPORT_SYMBOL(folio_redirty_for_writepage);
index 6443710..9b2d23f 100644 (file)
@@ -48,8 +48,11 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        if (walk->no_vma) {
                /*
                 * pte_offset_map() might apply user-specific validation.
+                * Indeed, on x86_64 the pmd entries set up by init_espfix_ap()
+                * fit its pmd_bad() check (_PAGE_NX set and _PAGE_RW clear),
+                * and CONFIG_EFI_PGT_DUMP efi_mm goes so far as to walk them.
                 */
-               if (walk->mm == &init_mm)
+               if (walk->mm == &init_mm || addr >= TASK_SIZE)
                        pte = pte_offset_kernel(pmd, addr);
                else
                        pte = pte_offset_map(pmd, addr);
@@ -397,6 +400,33 @@ static int __walk_page_range(unsigned long start, unsigned long end,
        return err;
 }
 
+static inline void process_mm_walk_lock(struct mm_struct *mm,
+                                       enum page_walk_lock walk_lock)
+{
+       if (walk_lock == PGWALK_RDLOCK)
+               mmap_assert_locked(mm);
+       else
+               mmap_assert_write_locked(mm);
+}
+
+static inline void process_vma_walk_lock(struct vm_area_struct *vma,
+                                        enum page_walk_lock walk_lock)
+{
+#ifdef CONFIG_PER_VMA_LOCK
+       switch (walk_lock) {
+       case PGWALK_WRLOCK:
+               vma_start_write(vma);
+               break;
+       case PGWALK_WRLOCK_VERIFY:
+               vma_assert_write_locked(vma);
+               break;
+       case PGWALK_RDLOCK:
+               /* PGWALK_RDLOCK is handled by process_mm_walk_lock */
+               break;
+       }
+#endif
+}
+
 /**
  * walk_page_range - walk page table with caller specific callbacks
  * @mm:                mm_struct representing the target process of page table walk
@@ -456,7 +486,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
        if (!walk.mm)
                return -EINVAL;
 
-       mmap_assert_locked(walk.mm);
+       process_mm_walk_lock(walk.mm, ops->walk_lock);
 
        vma = find_vma(walk.mm, start);
        do {
@@ -471,6 +501,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
                        if (ops->pte_hole)
                                err = ops->pte_hole(start, next, -1, &walk);
                } else { /* inside vma */
+                       process_vma_walk_lock(vma, ops->walk_lock);
                        walk.vma = vma;
                        next = min(end, vma->vm_end);
                        vma = find_vma(mm, vma->vm_end);
@@ -546,7 +577,8 @@ int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
        if (start < vma->vm_start || end > vma->vm_end)
                return -EINVAL;
 
-       mmap_assert_locked(walk.mm);
+       process_mm_walk_lock(walk.mm, ops->walk_lock);
+       process_vma_walk_lock(vma, ops->walk_lock);
        return __walk_page_range(start, end, &walk);
 }
 
@@ -563,7 +595,8 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
        if (!walk.mm)
                return -EINVAL;
 
-       mmap_assert_locked(walk.mm);
+       process_mm_walk_lock(walk.mm, ops->walk_lock);
+       process_vma_walk_lock(vma, ops->walk_lock);
        return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
 }
 
index a9c999a..e815c11 100644 (file)
@@ -461,19 +461,6 @@ static int try_context_readahead(struct address_space *mapping,
        return 1;
 }
 
-/*
- * There are some parts of the kernel which assume that PMD entries
- * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
- * limit the maximum allocation order to PMD size.  I'm not aware of any
- * assumptions about maximum order if THP are disabled, but 8 seems like
- * a good order (that's 1MB if you're using 4kB pages)
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER    HPAGE_PMD_ORDER
-#else
-#define MAX_PAGECACHE_ORDER    8
-#endif
-
 static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
                pgoff_t mark, unsigned int order, gfp_t gfp)
 {
index 2f2e0e6..479d1ce 100644 (file)
@@ -78,6 +78,7 @@ static struct vfsmount *shm_mnt;
 #include <uapi/linux/memfd.h>
 #include <linux/rmap.h>
 #include <linux/uuid.h>
+#include <linux/quotaops.h>
 
 #include <linux/uaccess.h>
 
@@ -89,6 +90,9 @@ static struct vfsmount *shm_mnt;
 /* Pretend that each entry is of this size in directory's i_size */
 #define BOGO_DIRENT_SIZE 20
 
+/* Pretend that one inode + its dentry occupy this much memory */
+#define BOGO_INODE_SIZE 1024
+
 /* Symlink up to this size is kmalloc'ed instead of using a swappable page */
 #define SHORT_SYMLINK_LEN 128
 
@@ -116,11 +120,14 @@ struct shmem_options {
        int huge;
        int seen;
        bool noswap;
+       unsigned short quota_types;
+       struct shmem_quota_limits qlimits;
 #define SHMEM_SEEN_BLOCKS 1
 #define SHMEM_SEEN_INODES 2
 #define SHMEM_SEEN_HUGE 4
 #define SHMEM_SEEN_INUMS 8
 #define SHMEM_SEEN_NOSWAP 16
+#define SHMEM_SEEN_QUOTA 32
 };
 
 #ifdef CONFIG_TMPFS
@@ -133,7 +140,8 @@ static unsigned long shmem_default_max_inodes(void)
 {
        unsigned long nr_pages = totalram_pages();
 
-       return min(nr_pages - totalhigh_pages(), nr_pages / 2);
+       return min3(nr_pages - totalhigh_pages(), nr_pages / 2,
+                       ULONG_MAX / BOGO_INODE_SIZE);
 }
 #endif
 
@@ -199,33 +207,47 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
                vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
 }
 
-static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
+static int shmem_inode_acct_block(struct inode *inode, long pages)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+       int err = -ENOSPC;
 
        if (shmem_acct_block(info->flags, pages))
-               return false;
+               return err;
 
+       might_sleep();  /* when quotas */
        if (sbinfo->max_blocks) {
                if (percpu_counter_compare(&sbinfo->used_blocks,
                                           sbinfo->max_blocks - pages) > 0)
                        goto unacct;
+
+               err = dquot_alloc_block_nodirty(inode, pages);
+               if (err)
+                       goto unacct;
+
                percpu_counter_add(&sbinfo->used_blocks, pages);
+       } else {
+               err = dquot_alloc_block_nodirty(inode, pages);
+               if (err)
+                       goto unacct;
        }
 
-       return true;
+       return 0;
 
 unacct:
        shmem_unacct_blocks(info->flags, pages);
-       return false;
+       return err;
 }
 
-static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
+static void shmem_inode_unacct_blocks(struct inode *inode, long pages)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
 
+       might_sleep();  /* when quotas */
+       dquot_free_block_nodirty(inode, pages);
+
        if (sbinfo->max_blocks)
                percpu_counter_sub(&sbinfo->used_blocks, pages);
        shmem_unacct_blocks(info->flags, pages);
@@ -254,6 +276,47 @@ bool vma_is_shmem(struct vm_area_struct *vma)
 static LIST_HEAD(shmem_swaplist);
 static DEFINE_MUTEX(shmem_swaplist_mutex);
 
+#ifdef CONFIG_TMPFS_QUOTA
+
+static int shmem_enable_quotas(struct super_block *sb,
+                              unsigned short quota_types)
+{
+       int type, err = 0;
+
+       sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+       for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
+               if (!(quota_types & (1 << type)))
+                       continue;
+               err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
+                                         DQUOT_USAGE_ENABLED |
+                                         DQUOT_LIMITS_ENABLED);
+               if (err)
+                       goto out_err;
+       }
+       return 0;
+
+out_err:
+       pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
+               type, err);
+       for (type--; type >= 0; type--)
+               dquot_quota_off(sb, type);
+       return err;
+}
+
+static void shmem_disable_quotas(struct super_block *sb)
+{
+       int type;
+
+       for (type = 0; type < SHMEM_MAXQUOTAS; type++)
+               dquot_quota_off(sb, type);
+}
+
+static struct dquot **shmem_get_dquots(struct inode *inode)
+{
+       return SHMEM_I(inode)->i_dquot;
+}
+#endif /* CONFIG_TMPFS_QUOTA */
+
 /*
  * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
  * produces a novel ino for the newly allocated inode.
@@ -272,11 +335,11 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
        if (!(sb->s_flags & SB_KERNMOUNT)) {
                raw_spin_lock(&sbinfo->stat_lock);
                if (sbinfo->max_inodes) {
-                       if (!sbinfo->free_inodes) {
+                       if (sbinfo->free_ispace < BOGO_INODE_SIZE) {
                                raw_spin_unlock(&sbinfo->stat_lock);
                                return -ENOSPC;
                        }
-                       sbinfo->free_inodes--;
+                       sbinfo->free_ispace -= BOGO_INODE_SIZE;
                }
                if (inop) {
                        ino = sbinfo->next_ino++;
@@ -330,12 +393,12 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
        return 0;
 }
 
-static void shmem_free_inode(struct super_block *sb)
+static void shmem_free_inode(struct super_block *sb, size_t freed_ispace)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
        if (sbinfo->max_inodes) {
                raw_spin_lock(&sbinfo->stat_lock);
-               sbinfo->free_inodes++;
+               sbinfo->free_ispace += BOGO_INODE_SIZE + freed_ispace;
                raw_spin_unlock(&sbinfo->stat_lock);
        }
 }
@@ -343,62 +406,65 @@ static void shmem_free_inode(struct super_block *sb)
 /**
  * shmem_recalc_inode - recalculate the block usage of an inode
  * @inode: inode to recalc
+ * @alloced: the change in number of pages allocated to inode
+ * @swapped: the change in number of pages swapped from inode
  *
  * We have to calculate the free blocks since the mm can drop
  * undirtied hole pages behind our back.
  *
  * But normally   info->alloced == inode->i_mapping->nrpages + info->swapped
  * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
- *
- * It has to be called with the spinlock held.
  */
-static void shmem_recalc_inode(struct inode *inode)
+static void shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
        long freed;
 
-       freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
-       if (freed > 0) {
+       spin_lock(&info->lock);
+       info->alloced += alloced;
+       info->swapped += swapped;
+       freed = info->alloced - info->swapped -
+               READ_ONCE(inode->i_mapping->nrpages);
+       /*
+        * Special case: whereas normally shmem_recalc_inode() is called
+        * after i_mapping->nrpages has already been adjusted (up or down),
+        * shmem_writepage() has to raise swapped before nrpages is lowered -
+        * to stop a racing shmem_recalc_inode() from thinking that a page has
+        * been freed.  Compensate here, to avoid the need for a followup call.
+        */
+       if (swapped > 0)
+               freed += swapped;
+       if (freed > 0)
                info->alloced -= freed;
-               inode->i_blocks -= freed * BLOCKS_PER_PAGE;
+       spin_unlock(&info->lock);
+
+       /* The quota case may block */
+       if (freed > 0)
                shmem_inode_unacct_blocks(inode, freed);
-       }
 }
 
 bool shmem_charge(struct inode *inode, long pages)
 {
-       struct shmem_inode_info *info = SHMEM_I(inode);
-       unsigned long flags;
+       struct address_space *mapping = inode->i_mapping;
 
-       if (!shmem_inode_acct_block(inode, pages))
+       if (shmem_inode_acct_block(inode, pages))
                return false;
 
        /* nrpages adjustment first, then shmem_recalc_inode() when balanced */
-       inode->i_mapping->nrpages += pages;
-
-       spin_lock_irqsave(&info->lock, flags);
-       info->alloced += pages;
-       inode->i_blocks += pages * BLOCKS_PER_PAGE;
-       shmem_recalc_inode(inode);
-       spin_unlock_irqrestore(&info->lock, flags);
+       xa_lock_irq(&mapping->i_pages);
+       mapping->nrpages += pages;
+       xa_unlock_irq(&mapping->i_pages);
 
+       shmem_recalc_inode(inode, pages, 0);
        return true;
 }
 
 void shmem_uncharge(struct inode *inode, long pages)
 {
-       struct shmem_inode_info *info = SHMEM_I(inode);
-       unsigned long flags;
-
+       /* pages argument is currently unused: keep it to help debugging */
        /* nrpages adjustment done by __filemap_remove_folio() or caller */
 
-       spin_lock_irqsave(&info->lock, flags);
-       info->alloced -= pages;
-       inode->i_blocks -= pages * BLOCKS_PER_PAGE;
-       shmem_recalc_inode(inode);
-       spin_unlock_irqrestore(&info->lock, flags);
-
-       shmem_inode_unacct_blocks(inode, pages);
+       shmem_recalc_inode(inode, 0, 0);
 }
 
 /*
@@ -806,14 +872,16 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
        XA_STATE(xas, &mapping->i_pages, start);
        struct page *page;
        unsigned long swapped = 0;
+       unsigned long max = end - 1;
 
        rcu_read_lock();
-       xas_for_each(&xas, page, end - 1) {
+       xas_for_each(&xas, page, max) {
                if (xas_retry(&xas, page))
                        continue;
                if (xa_is_value(page))
                        swapped++;
-
+               if (xas.xa_index == max)
+                       break;
                if (need_resched()) {
                        xas_pause(&xas);
                        cond_resched_rcu();
@@ -1038,16 +1106,13 @@ whole_folios:
                folio_batch_release(&fbatch);
        }
 
-       spin_lock_irq(&info->lock);
-       info->swapped -= nr_swaps_freed;
-       shmem_recalc_inode(inode);
-       spin_unlock_irq(&info->lock);
+       shmem_recalc_inode(inode, 0, -nr_swaps_freed);
 }
 
 void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 {
        shmem_undo_range(inode, lstart, lend, false);
-       inode->i_ctime = inode->i_mtime = current_time(inode);
+       inode->i_mtime = inode_set_ctime_current(inode);
        inode_inc_iversion(inode);
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -1059,11 +1124,9 @@ static int shmem_getattr(struct mnt_idmap *idmap,
        struct inode *inode = path->dentry->d_inode;
        struct shmem_inode_info *info = SHMEM_I(inode);
 
-       if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
-               spin_lock_irq(&info->lock);
-               shmem_recalc_inode(inode);
-               spin_unlock_irq(&info->lock);
-       }
+       if (info->alloced - info->swapped != inode->i_mapping->nrpages)
+               shmem_recalc_inode(inode, 0, 0);
+
        if (info->fsflags & FS_APPEND_FL)
                stat->attributes |= STATX_ATTR_APPEND;
        if (info->fsflags & FS_IMMUTABLE_FL)
@@ -1073,7 +1136,7 @@ static int shmem_getattr(struct mnt_idmap *idmap,
        stat->attributes_mask |= (STATX_ATTR_APPEND |
                        STATX_ATTR_IMMUTABLE |
                        STATX_ATTR_NODUMP);
-       generic_fillattr(idmap, inode, stat);
+       generic_fillattr(idmap, request_mask, inode, stat);
 
        if (shmem_is_huge(inode, 0, false, NULL, 0))
                stat->blksize = HPAGE_PMD_SIZE;
@@ -1140,13 +1203,28 @@ static int shmem_setattr(struct mnt_idmap *idmap,
                }
        }
 
+       if (is_quota_modification(idmap, inode, attr)) {
+               error = dquot_initialize(inode);
+               if (error)
+                       return error;
+       }
+
+       /* Transfer quota accounting */
+       if (i_uid_needs_update(idmap, attr, inode) ||
+           i_gid_needs_update(idmap, attr, inode)) {
+               error = dquot_transfer(idmap, inode, attr);
+
+               if (error)
+                       return error;
+       }
+
        setattr_copy(idmap, inode, attr);
        if (attr->ia_valid & ATTR_MODE)
                error = posix_acl_chmod(idmap, dentry, inode->i_mode);
        if (!error && update_ctime) {
-               inode->i_ctime = current_time(inode);
+               inode_set_ctime_current(inode);
                if (update_mtime)
-                       inode->i_mtime = inode->i_ctime;
+                       inode->i_mtime = inode_get_ctime(inode);
                inode_inc_iversion(inode);
        }
        return error;
@@ -1156,6 +1234,7 @@ static void shmem_evict_inode(struct inode *inode)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+       size_t freed = 0;
 
        if (shmem_mapping(inode->i_mapping)) {
                shmem_unacct_size(info->flags, inode->i_size);
@@ -1182,10 +1261,14 @@ static void shmem_evict_inode(struct inode *inode)
                }
        }
 
-       simple_xattrs_free(&info->xattrs);
+       simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+       shmem_free_inode(inode->i_sb, freed);
        WARN_ON(inode->i_blocks);
-       shmem_free_inode(inode->i_sb);
        clear_inode(inode);
+#ifdef CONFIG_TMPFS_QUOTA
+       dquot_free_inode(inode);
+       dquot_drop(inode);
+#endif
 }
 
 static int shmem_find_swap_entries(struct address_space *mapping,
@@ -1429,11 +1512,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
        if (add_to_swap_cache(folio, swap,
                        __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
                        NULL) == 0) {
-               spin_lock_irq(&info->lock);
-               shmem_recalc_inode(inode);
-               info->swapped++;
-               spin_unlock_irq(&info->lock);
-
+               shmem_recalc_inode(inode, 0, 1);
                swap_shmem_alloc(swap);
                shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));
 
@@ -1588,13 +1667,14 @@ static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode,
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct folio *folio;
        int nr;
-       int err = -ENOSPC;
+       int err;
 
        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
                huge = false;
        nr = huge ? HPAGE_PMD_NR : 1;
 
-       if (!shmem_inode_acct_block(inode, nr))
+       err = shmem_inode_acct_block(inode, nr);
+       if (err)
                goto failed;
 
        if (huge)
@@ -1703,7 +1783,6 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
                                         struct folio *folio, swp_entry_t swap)
 {
        struct address_space *mapping = inode->i_mapping;
-       struct shmem_inode_info *info = SHMEM_I(inode);
        swp_entry_t swapin_error;
        void *old;
 
@@ -1716,16 +1795,12 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
 
        folio_wait_writeback(folio);
        delete_from_swap_cache(folio);
-       spin_lock_irq(&info->lock);
        /*
-        * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks won't
-        * be 0 when inode is released and thus trigger WARN_ON(inode->i_blocks) in
-        * shmem_evict_inode.
+        * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
+        * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
+        * in shmem_evict_inode().
         */
-       info->alloced--;
-       info->swapped--;
-       shmem_recalc_inode(inode);
-       spin_unlock_irq(&info->lock);
+       shmem_recalc_inode(inode, -1, -1);
        swap_free(swap);
 }
 
@@ -1812,10 +1887,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
        if (error)
                goto failed;
 
-       spin_lock_irq(&info->lock);
-       info->swapped--;
-       shmem_recalc_inode(inode);
-       spin_unlock_irq(&info->lock);
+       shmem_recalc_inode(inode, 0, -1);
 
        if (sgp == SGP_WRITE)
                folio_mark_accessed(folio);
@@ -1980,13 +2052,9 @@ alloc_nohuge:
                                        charge_mm);
        if (error)
                goto unacct;
-       folio_add_lru(folio);
 
-       spin_lock_irq(&info->lock);
-       info->alloced += folio_nr_pages(folio);
-       inode->i_blocks += (blkcnt_t)BLOCKS_PER_PAGE << folio_order(folio);
-       shmem_recalc_inode(inode);
-       spin_unlock_irq(&info->lock);
+       folio_add_lru(folio);
+       shmem_recalc_inode(inode, folio_nr_pages(folio), 0);
        alloced = true;
 
        if (folio_test_pmd_mappable(folio) &&
@@ -2035,9 +2103,7 @@ clear:
                if (alloced) {
                        folio_clear_dirty(folio);
                        filemap_remove_folio(folio);
-                       spin_lock_irq(&info->lock);
-                       shmem_recalc_inode(inode);
-                       spin_unlock_irq(&info->lock);
+                       shmem_recalc_inode(inode, 0, 0);
                }
                error = -EINVAL;
                goto unlock;
@@ -2063,9 +2129,7 @@ unlock:
                folio_put(folio);
        }
        if (error == -ENOSPC && !once++) {
-               spin_lock_irq(&info->lock);
-               shmem_recalc_inode(inode);
-               spin_unlock_irq(&info->lock);
+               shmem_recalc_inode(inode, 0, 0);
                goto repeat;
        }
        if (error == -EEXIST)
@@ -2326,6 +2390,12 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
        return 0;
 }
 
+static int shmem_file_open(struct inode *inode, struct file *file)
+{
+       file->f_mode |= FMODE_CAN_ODIRECT;
+       return generic_file_open(inode, file);
+}
+
 #ifdef CONFIG_TMPFS_XATTR
 static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
 
@@ -2355,77 +2425,127 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
 #define shmem_initxattrs NULL
 #endif
 
-static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb,
-                                    struct inode *dir, umode_t mode, dev_t dev,
-                                    unsigned long flags)
+static struct offset_ctx *shmem_get_offset_ctx(struct inode *inode)
+{
+       return &SHMEM_I(inode)->dir_offsets;
+}
+
+static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
+                                            struct super_block *sb,
+                                            struct inode *dir, umode_t mode,
+                                            dev_t dev, unsigned long flags)
 {
        struct inode *inode;
        struct shmem_inode_info *info;
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
        ino_t ino;
+       int err;
+
+       err = shmem_reserve_inode(sb, &ino);
+       if (err)
+               return ERR_PTR(err);
 
-       if (shmem_reserve_inode(sb, &ino))
-               return NULL;
 
        inode = new_inode(sb);
-       if (inode) {
-               inode->i_ino = ino;
-               inode_init_owner(idmap, inode, dir, mode);
-               inode->i_blocks = 0;
-               inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
-               inode->i_generation = get_random_u32();
-               info = SHMEM_I(inode);
-               memset(info, 0, (char *)inode - (char *)info);
-               spin_lock_init(&info->lock);
-               atomic_set(&info->stop_eviction, 0);
-               info->seals = F_SEAL_SEAL;
-               info->flags = flags & VM_NORESERVE;
-               info->i_crtime = inode->i_mtime;
-               info->fsflags = (dir == NULL) ? 0 :
-                       SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
-               if (info->fsflags)
-                       shmem_set_inode_flags(inode, info->fsflags);
-               INIT_LIST_HEAD(&info->shrinklist);
-               INIT_LIST_HEAD(&info->swaplist);
-               if (sbinfo->noswap)
-                       mapping_set_unevictable(inode->i_mapping);
-               simple_xattrs_init(&info->xattrs);
-               cache_no_acl(inode);
-               mapping_set_large_folios(inode->i_mapping);
-
-               switch (mode & S_IFMT) {
-               default:
-                       inode->i_op = &shmem_special_inode_operations;
-                       init_special_inode(inode, mode, dev);
-                       break;
-               case S_IFREG:
-                       inode->i_mapping->a_ops = &shmem_aops;
-                       inode->i_op = &shmem_inode_operations;
-                       inode->i_fop = &shmem_file_operations;
-                       mpol_shared_policy_init(&info->policy,
-                                                shmem_get_sbmpol(sbinfo));
-                       break;
-               case S_IFDIR:
-                       inc_nlink(inode);
-                       /* Some things misbehave if size == 0 on a directory */
-                       inode->i_size = 2 * BOGO_DIRENT_SIZE;
-                       inode->i_op = &shmem_dir_inode_operations;
-                       inode->i_fop = &simple_dir_operations;
-                       break;
-               case S_IFLNK:
-                       /*
-                        * Must not load anything in the rbtree,
-                        * mpol_free_shared_policy will not be called.
-                        */
-                       mpol_shared_policy_init(&info->policy, NULL);
-                       break;
-               }
+       if (!inode) {
+               shmem_free_inode(sb, 0);
+               return ERR_PTR(-ENOSPC);
+       }
+
+       inode->i_ino = ino;
+       inode_init_owner(idmap, inode, dir, mode);
+       inode->i_blocks = 0;
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+       inode->i_generation = get_random_u32();
+       info = SHMEM_I(inode);
+       memset(info, 0, (char *)inode - (char *)info);
+       spin_lock_init(&info->lock);
+       atomic_set(&info->stop_eviction, 0);
+       info->seals = F_SEAL_SEAL;
+       info->flags = flags & VM_NORESERVE;
+       info->i_crtime = inode->i_mtime;
+       info->fsflags = (dir == NULL) ? 0 :
+               SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
+       if (info->fsflags)
+               shmem_set_inode_flags(inode, info->fsflags);
+       INIT_LIST_HEAD(&info->shrinklist);
+       INIT_LIST_HEAD(&info->swaplist);
+       INIT_LIST_HEAD(&info->swaplist);
+       if (sbinfo->noswap)
+               mapping_set_unevictable(inode->i_mapping);
+       simple_xattrs_init(&info->xattrs);
+       cache_no_acl(inode);
+       mapping_set_large_folios(inode->i_mapping);
+
+       switch (mode & S_IFMT) {
+       default:
+               inode->i_op = &shmem_special_inode_operations;
+               init_special_inode(inode, mode, dev);
+               break;
+       case S_IFREG:
+               inode->i_mapping->a_ops = &shmem_aops;
+               inode->i_op = &shmem_inode_operations;
+               inode->i_fop = &shmem_file_operations;
+               mpol_shared_policy_init(&info->policy,
+                                        shmem_get_sbmpol(sbinfo));
+               break;
+       case S_IFDIR:
+               inc_nlink(inode);
+               /* Some things misbehave if size == 0 on a directory */
+               inode->i_size = 2 * BOGO_DIRENT_SIZE;
+               inode->i_op = &shmem_dir_inode_operations;
+               inode->i_fop = &simple_offset_dir_operations;
+               simple_offset_init(shmem_get_offset_ctx(inode));
+               break;
+       case S_IFLNK:
+               /*
+                * Must not load anything in the rbtree,
+                * mpol_free_shared_policy will not be called.
+                */
+               mpol_shared_policy_init(&info->policy, NULL);
+               break;
+       }
+
+       lockdep_annotate_inode_mutex_key(inode);
+       return inode;
+}
 
-               lockdep_annotate_inode_mutex_key(inode);
-       } else
-               shmem_free_inode(sb);
+#ifdef CONFIG_TMPFS_QUOTA
+static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+                                    struct super_block *sb, struct inode *dir,
+                                    umode_t mode, dev_t dev, unsigned long flags)
+{
+       int err;
+       struct inode *inode;
+
+       inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
+       if (IS_ERR(inode))
+               return inode;
+
+       err = dquot_initialize(inode);
+       if (err)
+               goto errout;
+
+       err = dquot_alloc_inode(inode);
+       if (err) {
+               dquot_drop(inode);
+               goto errout;
+       }
        return inode;
+
+errout:
+       inode->i_flags |= S_NOQUOTA;
+       iput(inode);
+       return ERR_PTR(err);
+}
+#else
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+                                    struct super_block *sb, struct inode *dir,
+                                    umode_t mode, dev_t dev, unsigned long flags)
+{
+       return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
 }
+#endif /* CONFIG_TMPFS_QUOTA */
 
 #ifdef CONFIG_USERFAULTFD
 int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
@@ -2445,7 +2565,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
        int ret;
        pgoff_t max_off;
 
-       if (!shmem_inode_acct_block(inode, 1)) {
+       if (shmem_inode_acct_block(inode, 1)) {
                /*
                 * We may have got a page, returned -ENOENT triggering a retry,
                 * and now we find ourselves with -ENOMEM. Release the page, to
@@ -2527,12 +2647,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
        if (ret)
                goto out_delete_from_cache;
 
-       spin_lock_irq(&info->lock);
-       info->alloced++;
-       inode->i_blocks += BLOCKS_PER_PAGE;
-       shmem_recalc_inode(inode);
-       spin_unlock_irq(&info->lock);
-
+       shmem_recalc_inode(inode, 1, 0);
        folio_unlock(folio);
        return 0;
 out_delete_from_cache:
@@ -2731,6 +2846,28 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return retval ? retval : error;
 }
 
+static ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file->f_mapping->host;
+       ssize_t ret;
+
+       inode_lock(inode);
+       ret = generic_write_checks(iocb, from);
+       if (ret <= 0)
+               goto unlock;
+       ret = file_remove_privs(file);
+       if (ret)
+               goto unlock;
+       ret = file_update_time(file);
+       if (ret)
+               goto unlock;
+       ret = generic_perform_write(iocb, from);
+unlock:
+       inode_unlock(inode);
+       return ret;
+}
+
 static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
                              struct pipe_buffer *buf)
 {
@@ -2796,7 +2933,8 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
                if (*ppos >= i_size_read(inode))
                        break;
 
-               error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
+               error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio,
+                                       SGP_READ);
                if (error) {
                        if (error == -EINVAL)
                                error = 0;
@@ -2805,7 +2943,9 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
                if (folio) {
                        folio_unlock(folio);
 
-                       if (folio_test_hwpoison(folio)) {
+                       if (folio_test_hwpoison(folio) ||
+                           (folio_test_large(folio) &&
+                            folio_test_has_hwpoisoned(folio))) {
                                error = -EIO;
                                break;
                        }
@@ -2841,7 +2981,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
                        folio_put(folio);
                        folio = NULL;
                } else {
-                       n = splice_zeropage_into_pipe(pipe, *ppos, len);
+                       n = splice_zeropage_into_pipe(pipe, *ppos, part);
                }
 
                if (!n)
@@ -3052,7 +3192,7 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
        }
        if (sbinfo->max_inodes) {
                buf->f_files = sbinfo->max_inodes;
-               buf->f_ffree = sbinfo->free_inodes;
+               buf->f_ffree = sbinfo->free_ispace / BOGO_INODE_SIZE;
        }
        /* else leave those fields 0 like simple_statfs */
 
@@ -3069,27 +3209,32 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
            struct dentry *dentry, umode_t mode, dev_t dev)
 {
        struct inode *inode;
-       int error = -ENOSPC;
+       int error;
 
        inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
-       if (inode) {
-               error = simple_acl_create(dir, inode);
-               if (error)
-                       goto out_iput;
-               error = security_inode_init_security(inode, dir,
-                                                    &dentry->d_name,
-                                                    shmem_initxattrs, NULL);
-               if (error && error != -EOPNOTSUPP)
-                       goto out_iput;
+       if (IS_ERR(inode))
+               return PTR_ERR(inode);
 
-               error = 0;
-               dir->i_size += BOGO_DIRENT_SIZE;
-               dir->i_ctime = dir->i_mtime = current_time(dir);
-               inode_inc_iversion(dir);
-               d_instantiate(dentry, inode);
-               dget(dentry); /* Extra count - pin the dentry in core */
-       }
+       error = simple_acl_create(dir, inode);
+       if (error)
+               goto out_iput;
+       error = security_inode_init_security(inode, dir,
+                                            &dentry->d_name,
+                                            shmem_initxattrs, NULL);
+       if (error && error != -EOPNOTSUPP)
+               goto out_iput;
+
+       error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+       if (error)
+               goto out_iput;
+
+       dir->i_size += BOGO_DIRENT_SIZE;
+       dir->i_mtime = inode_set_ctime_current(dir);
+       inode_inc_iversion(dir);
+       d_instantiate(dentry, inode);
+       dget(dentry); /* Extra count - pin the dentry in core */
        return error;
+
 out_iput:
        iput(inode);
        return error;
@@ -3100,20 +3245,26 @@ shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
              struct file *file, umode_t mode)
 {
        struct inode *inode;
-       int error = -ENOSPC;
+       int error;
 
        inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
-       if (inode) {
-               error = security_inode_init_security(inode, dir,
-                                                    NULL,
-                                                    shmem_initxattrs, NULL);
-               if (error && error != -EOPNOTSUPP)
-                       goto out_iput;
-               error = simple_acl_create(dir, inode);
-               if (error)
-                       goto out_iput;
-               d_tmpfile(file, inode);
+
+       if (IS_ERR(inode)) {
+               error = PTR_ERR(inode);
+               goto err_out;
        }
+
+       error = security_inode_init_security(inode, dir,
+                                            NULL,
+                                            shmem_initxattrs, NULL);
+       if (error && error != -EOPNOTSUPP)
+               goto out_iput;
+       error = simple_acl_create(dir, inode);
+       if (error)
+               goto out_iput;
+       d_tmpfile(file, inode);
+
+err_out:
        return finish_open_simple(file, error);
 out_iput:
        iput(inode);
@@ -3159,8 +3310,16 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
                        goto out;
        }
 
+       ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+       if (ret) {
+               if (inode->i_nlink)
+                       shmem_free_inode(inode->i_sb, 0);
+               goto out;
+       }
+
        dir->i_size += BOGO_DIRENT_SIZE;
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+       dir->i_mtime = inode_set_ctime_to_ts(dir,
+                                            inode_set_ctime_current(inode));
        inode_inc_iversion(dir);
        inc_nlink(inode);
        ihold(inode);   /* New dentry reference */
@@ -3175,10 +3334,13 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
        struct inode *inode = d_inode(dentry);
 
        if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
-               shmem_free_inode(inode->i_sb);
+               shmem_free_inode(inode->i_sb, 0);
+
+       simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
 
        dir->i_size -= BOGO_DIRENT_SIZE;
-       inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+       dir->i_mtime = inode_set_ctime_to_ts(dir,
+                                            inode_set_ctime_current(inode));
        inode_inc_iversion(dir);
        drop_nlink(inode);
        dput(dentry);   /* Undo the count from "create" - this does all the work */
@@ -3235,24 +3397,29 @@ static int shmem_rename2(struct mnt_idmap *idmap,
 {
        struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = S_ISDIR(inode->i_mode);
+       int error;
 
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                return -EINVAL;
 
        if (flags & RENAME_EXCHANGE)
-               return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+               return simple_offset_rename_exchange(old_dir, old_dentry,
+                                                    new_dir, new_dentry);
 
        if (!simple_empty(new_dentry))
                return -ENOTEMPTY;
 
        if (flags & RENAME_WHITEOUT) {
-               int error;
-
                error = shmem_whiteout(idmap, old_dir, old_dentry);
                if (error)
                        return error;
        }
 
+       simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry);
+       error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
+       if (error)
+               return error;
+
        if (d_really_is_positive(new_dentry)) {
                (void) shmem_unlink(new_dir, new_dentry);
                if (they_are_dirs) {
@@ -3266,9 +3433,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
 
        old_dir->i_size -= BOGO_DIRENT_SIZE;
        new_dir->i_size += BOGO_DIRENT_SIZE;
-       old_dir->i_ctime = old_dir->i_mtime =
-       new_dir->i_ctime = new_dir->i_mtime =
-       inode->i_ctime = current_time(old_dir);
+       simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
        inode_inc_iversion(old_dir);
        inode_inc_iversion(new_dir);
        return 0;
@@ -3288,31 +3453,32 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
 
        inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
                                VM_NORESERVE);
-       if (!inode)
-               return -ENOSPC;
+
+       if (IS_ERR(inode))
+               return PTR_ERR(inode);
 
        error = security_inode_init_security(inode, dir, &dentry->d_name,
                                             shmem_initxattrs, NULL);
-       if (error && error != -EOPNOTSUPP) {
-               iput(inode);
-               return error;
-       }
+       if (error && error != -EOPNOTSUPP)
+               goto out_iput;
+
+       error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+       if (error)
+               goto out_iput;
 
        inode->i_size = len-1;
        if (len <= SHORT_SYMLINK_LEN) {
                inode->i_link = kmemdup(symname, len, GFP_KERNEL);
                if (!inode->i_link) {
-                       iput(inode);
-                       return -ENOMEM;
+                       error = -ENOMEM;
+                       goto out_remove_offset;
                }
                inode->i_op = &shmem_short_symlink_operations;
        } else {
                inode_nohighmem(inode);
                error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
-               if (error) {
-                       iput(inode);
-                       return error;
-               }
+               if (error)
+                       goto out_remove_offset;
                inode->i_mapping->a_ops = &shmem_aops;
                inode->i_op = &shmem_symlink_inode_operations;
                memcpy(folio_address(folio), symname, len);
@@ -3322,11 +3488,17 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
                folio_put(folio);
        }
        dir->i_size += BOGO_DIRENT_SIZE;
-       dir->i_ctime = dir->i_mtime = current_time(dir);
+       dir->i_mtime = inode_set_ctime_current(dir);
        inode_inc_iversion(dir);
        d_instantiate(dentry, inode);
        dget(dentry);
        return 0;
+
+out_remove_offset:
+       simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
+out_iput:
+       iput(inode);
+       return error;
 }
 
 static void shmem_put_link(void *arg)
@@ -3394,7 +3566,7 @@ static int shmem_fileattr_set(struct mnt_idmap *idmap,
                (fa->flags & SHMEM_FL_USER_MODIFIABLE);
 
        shmem_set_inode_flags(inode, info->fsflags);
-       inode->i_ctime = current_time(inode);
+       inode_set_ctime_current(inode);
        inode_inc_iversion(inode);
        return 0;
 }
@@ -3414,21 +3586,40 @@ static int shmem_initxattrs(struct inode *inode,
                            void *fs_info)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        const struct xattr *xattr;
        struct simple_xattr *new_xattr;
+       size_t ispace = 0;
        size_t len;
 
+       if (sbinfo->max_inodes) {
+               for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+                       ispace += simple_xattr_space(xattr->name,
+                               xattr->value_len + XATTR_SECURITY_PREFIX_LEN);
+               }
+               if (ispace) {
+                       raw_spin_lock(&sbinfo->stat_lock);
+                       if (sbinfo->free_ispace < ispace)
+                               ispace = 0;
+                       else
+                               sbinfo->free_ispace -= ispace;
+                       raw_spin_unlock(&sbinfo->stat_lock);
+                       if (!ispace)
+                               return -ENOSPC;
+               }
+       }
+
        for (xattr = xattr_array; xattr->name != NULL; xattr++) {
                new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
                if (!new_xattr)
-                       return -ENOMEM;
+                       break;
 
                len = strlen(xattr->name) + 1;
                new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
-                                         GFP_KERNEL);
+                                         GFP_KERNEL_ACCOUNT);
                if (!new_xattr->name) {
                        kvfree(new_xattr);
-                       return -ENOMEM;
+                       break;
                }
 
                memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
@@ -3439,6 +3630,16 @@ static int shmem_initxattrs(struct inode *inode,
                simple_xattr_add(&info->xattrs, new_xattr);
        }
 
+       if (xattr->name != NULL) {
+               if (ispace) {
+                       raw_spin_lock(&sbinfo->stat_lock);
+                       sbinfo->free_ispace += ispace;
+                       raw_spin_unlock(&sbinfo->stat_lock);
+               }
+               simple_xattrs_free(&info->xattrs, NULL);
+               return -ENOMEM;
+       }
+
        return 0;
 }
 
@@ -3459,15 +3660,40 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
                                   size_t size, int flags)
 {
        struct shmem_inode_info *info = SHMEM_I(inode);
-       int err;
+       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+       struct simple_xattr *old_xattr;
+       size_t ispace = 0;
 
        name = xattr_full_name(handler, name);
-       err = simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
-       if (!err) {
-               inode->i_ctime = current_time(inode);
+       if (value && sbinfo->max_inodes) {
+               ispace = simple_xattr_space(name, size);
+               raw_spin_lock(&sbinfo->stat_lock);
+               if (sbinfo->free_ispace < ispace)
+                       ispace = 0;
+               else
+                       sbinfo->free_ispace -= ispace;
+               raw_spin_unlock(&sbinfo->stat_lock);
+               if (!ispace)
+                       return -ENOSPC;
+       }
+
+       old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+       if (!IS_ERR(old_xattr)) {
+               ispace = 0;
+               if (old_xattr && sbinfo->max_inodes)
+                       ispace = simple_xattr_space(old_xattr->name,
+                                                   old_xattr->size);
+               simple_xattr_free(old_xattr);
+               old_xattr = NULL;
+               inode_set_ctime_current(inode);
                inode_inc_iversion(inode);
        }
-       return err;
+       if (ispace) {
+               raw_spin_lock(&sbinfo->stat_lock);
+               sbinfo->free_ispace += ispace;
+               raw_spin_unlock(&sbinfo->stat_lock);
+       }
+       return PTR_ERR(old_xattr);
 }
 
 static const struct xattr_handler shmem_security_xattr_handler = {
@@ -3482,9 +3708,16 @@ static const struct xattr_handler shmem_trusted_xattr_handler = {
        .set = shmem_xattr_handler_set,
 };
 
+static const struct xattr_handler shmem_user_xattr_handler = {
+       .prefix = XATTR_USER_PREFIX,
+       .get = shmem_xattr_handler_get,
+       .set = shmem_xattr_handler_set,
+};
+
 static const struct xattr_handler *shmem_xattr_handlers[] = {
        &shmem_security_xattr_handler,
        &shmem_trusted_xattr_handler,
+       &shmem_user_xattr_handler,
        NULL
 };
 
@@ -3497,6 +3730,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
 
 static const struct inode_operations shmem_short_symlink_operations = {
        .getattr        = shmem_getattr,
+       .setattr        = shmem_setattr,
        .get_link       = simple_get_link,
 #ifdef CONFIG_TMPFS_XATTR
        .listxattr      = shmem_listxattr,
@@ -3505,6 +3739,7 @@ static const struct inode_operations shmem_short_symlink_operations = {
 
 static const struct inode_operations shmem_symlink_inode_operations = {
        .getattr        = shmem_getattr,
+       .setattr        = shmem_setattr,
        .get_link       = shmem_get_link,
 #ifdef CONFIG_TMPFS_XATTR
        .listxattr      = shmem_listxattr,
@@ -3604,6 +3839,13 @@ enum shmem_param {
        Opt_inode32,
        Opt_inode64,
        Opt_noswap,
+       Opt_quota,
+       Opt_usrquota,
+       Opt_grpquota,
+       Opt_usrquota_block_hardlimit,
+       Opt_usrquota_inode_hardlimit,
+       Opt_grpquota_block_hardlimit,
+       Opt_grpquota_inode_hardlimit,
 };
 
 static const struct constant_table shmem_param_enums_huge[] = {
@@ -3626,6 +3868,15 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
        fsparam_flag  ("inode32",       Opt_inode32),
        fsparam_flag  ("inode64",       Opt_inode64),
        fsparam_flag  ("noswap",        Opt_noswap),
+#ifdef CONFIG_TMPFS_QUOTA
+       fsparam_flag  ("quota",         Opt_quota),
+       fsparam_flag  ("usrquota",      Opt_usrquota),
+       fsparam_flag  ("grpquota",      Opt_grpquota),
+       fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit),
+       fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit),
+       fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
+       fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
+#endif
        {}
 };
 
@@ -3636,6 +3887,8 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
        unsigned long long size;
        char *rest;
        int opt;
+       kuid_t kuid;
+       kgid_t kgid;
 
        opt = fs_parse(fc, shmem_fs_parameters, param, &result);
        if (opt < 0)
@@ -3657,13 +3910,13 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
                break;
        case Opt_nr_blocks:
                ctx->blocks = memparse(param->string, &rest);
-               if (*rest || ctx->blocks > S64_MAX)
+               if (*rest || ctx->blocks > LONG_MAX)
                        goto bad_value;
                ctx->seen |= SHMEM_SEEN_BLOCKS;
                break;
        case Opt_nr_inodes:
                ctx->inodes = memparse(param->string, &rest);
-               if (*rest)
+               if (*rest || ctx->inodes > ULONG_MAX / BOGO_INODE_SIZE)
                        goto bad_value;
                ctx->seen |= SHMEM_SEEN_INODES;
                break;
@@ -3671,14 +3924,32 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
                ctx->mode = result.uint_32 & 07777;
                break;
        case Opt_uid:
-               ctx->uid = make_kuid(current_user_ns(), result.uint_32);
-               if (!uid_valid(ctx->uid))
+               kuid = make_kuid(current_user_ns(), result.uint_32);
+               if (!uid_valid(kuid))
+                       goto bad_value;
+
+               /*
+                * The requested uid must be representable in the
+                * filesystem's idmapping.
+                */
+               if (!kuid_has_mapping(fc->user_ns, kuid))
                        goto bad_value;
+
+               ctx->uid = kuid;
                break;
        case Opt_gid:
-               ctx->gid = make_kgid(current_user_ns(), result.uint_32);
-               if (!gid_valid(ctx->gid))
+               kgid = make_kgid(current_user_ns(), result.uint_32);
+               if (!gid_valid(kgid))
+                       goto bad_value;
+
+               /*
+                * The requested gid must be representable in the
+                * filesystem's idmapping.
+                */
+               if (!kgid_has_mapping(fc->user_ns, kgid))
                        goto bad_value;
+
+               ctx->gid = kgid;
                break;
        case Opt_huge:
                ctx->huge = result.uint_32;
@@ -3717,6 +3988,60 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
                ctx->noswap = true;
                ctx->seen |= SHMEM_SEEN_NOSWAP;
                break;
+       case Opt_quota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+               ctx->seen |= SHMEM_SEEN_QUOTA;
+               ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
+               break;
+       case Opt_usrquota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+               ctx->seen |= SHMEM_SEEN_QUOTA;
+               ctx->quota_types |= QTYPE_MASK_USR;
+               break;
+       case Opt_grpquota:
+               if (fc->user_ns != &init_user_ns)
+                       return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+               ctx->seen |= SHMEM_SEEN_QUOTA;
+               ctx->quota_types |= QTYPE_MASK_GRP;
+               break;
+       case Opt_usrquota_block_hardlimit:
+               size = memparse(param->string, &rest);
+               if (*rest || !size)
+                       goto bad_value;
+               if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
+                       return invalfc(fc,
+                                      "User quota block hardlimit too large.");
+               ctx->qlimits.usrquota_bhardlimit = size;
+               break;
+       case Opt_grpquota_block_hardlimit:
+               size = memparse(param->string, &rest);
+               if (*rest || !size)
+                       goto bad_value;
+               if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
+                       return invalfc(fc,
+                                      "Group quota block hardlimit too large.");
+               ctx->qlimits.grpquota_bhardlimit = size;
+               break;
+       case Opt_usrquota_inode_hardlimit:
+               size = memparse(param->string, &rest);
+               if (*rest || !size)
+                       goto bad_value;
+               if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
+                       return invalfc(fc,
+                                      "User quota inode hardlimit too large.");
+               ctx->qlimits.usrquota_ihardlimit = size;
+               break;
+       case Opt_grpquota_inode_hardlimit:
+               size = memparse(param->string, &rest);
+               if (*rest || !size)
+                       goto bad_value;
+               if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
+                       return invalfc(fc,
+                                      "Group quota inode hardlimit too large.");
+               ctx->qlimits.grpquota_ihardlimit = size;
+               break;
        }
        return 0;
 
@@ -3772,21 +4097,17 @@ static int shmem_parse_options(struct fs_context *fc, void *data)
 
 /*
  * Reconfigure a shmem filesystem.
- *
- * Note that we disallow change from limited->unlimited blocks/inodes while any
- * are in use; but we must separately disallow unlimited->limited, because in
- * that case we have no record of how much is already in use.
  */
 static int shmem_reconfigure(struct fs_context *fc)
 {
        struct shmem_options *ctx = fc->fs_private;
        struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
-       unsigned long inodes;
+       unsigned long used_isp;
        struct mempolicy *mpol = NULL;
        const char *err;
 
        raw_spin_lock(&sbinfo->stat_lock);
-       inodes = sbinfo->max_inodes - sbinfo->free_inodes;
+       used_isp = sbinfo->max_inodes * BOGO_INODE_SIZE - sbinfo->free_ispace;
 
        if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
                if (!sbinfo->max_blocks) {
@@ -3804,7 +4125,7 @@ static int shmem_reconfigure(struct fs_context *fc)
                        err = "Cannot retroactively limit inodes";
                        goto out;
                }
-               if (ctx->inodes < inodes) {
+               if (ctx->inodes * BOGO_INODE_SIZE < used_isp) {
                        err = "Too few inodes for current use";
                        goto out;
                }
@@ -3824,6 +4145,24 @@ static int shmem_reconfigure(struct fs_context *fc)
                goto out;
        }
 
+       if (ctx->seen & SHMEM_SEEN_QUOTA &&
+           !sb_any_quota_loaded(fc->root->d_sb)) {
+               err = "Cannot enable quota on remount";
+               goto out;
+       }
+
+#ifdef CONFIG_TMPFS_QUOTA
+#define CHANGED_LIMIT(name)                                            \
+       (ctx->qlimits.name## hardlimit &&                               \
+       (ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit))
+
+       if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) ||
+           CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) {
+               err = "Cannot change global quota limit on remount";
+               goto out;
+       }
+#endif /* CONFIG_TMPFS_QUOTA */
+
        if (ctx->seen & SHMEM_SEEN_HUGE)
                sbinfo->huge = ctx->huge;
        if (ctx->seen & SHMEM_SEEN_INUMS)
@@ -3832,7 +4171,7 @@ static int shmem_reconfigure(struct fs_context *fc)
                sbinfo->max_blocks  = ctx->blocks;
        if (ctx->seen & SHMEM_SEEN_INODES) {
                sbinfo->max_inodes  = ctx->inodes;
-               sbinfo->free_inodes = ctx->inodes - inodes;
+               sbinfo->free_ispace = ctx->inodes * BOGO_INODE_SIZE - used_isp;
        }
 
        /*
@@ -3915,6 +4254,9 @@ static void shmem_put_super(struct super_block *sb)
 {
        struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 
+#ifdef CONFIG_TMPFS_QUOTA
+       shmem_disable_quotas(sb);
+#endif
        free_percpu(sbinfo->ino_batch);
        percpu_counter_destroy(&sbinfo->used_blocks);
        mpol_put(sbinfo->mpol);
@@ -3927,12 +4269,13 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
        struct shmem_options *ctx = fc->fs_private;
        struct inode *inode;
        struct shmem_sb_info *sbinfo;
+       int error = -ENOMEM;
 
        /* Round up to L1_CACHE_BYTES to resist false sharing */
        sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
                                L1_CACHE_BYTES), GFP_KERNEL);
        if (!sbinfo)
-               return -ENOMEM;
+               return error;
 
        sb->s_fs_info = sbinfo;
 
@@ -3959,7 +4302,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
        sb->s_flags |= SB_NOUSER;
 #endif
        sbinfo->max_blocks = ctx->blocks;
-       sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
+       sbinfo->max_inodes = ctx->inodes;
+       sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
        if (sb->s_flags & SB_KERNMOUNT) {
                sbinfo->ino_batch = alloc_percpu(ino_t);
                if (!sbinfo->ino_batch)
@@ -3993,10 +4337,27 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 #endif
        uuid_gen(&sb->s_uuid);
 
+#ifdef CONFIG_TMPFS_QUOTA
+       if (ctx->seen & SHMEM_SEEN_QUOTA) {
+               sb->dq_op = &shmem_quota_operations;
+               sb->s_qcop = &dquot_quotactl_sysfile_ops;
+               sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+
+               /* Copy the default limits from ctx into sbinfo */
+               memcpy(&sbinfo->qlimits, &ctx->qlimits,
+                      sizeof(struct shmem_quota_limits));
+
+               if (shmem_enable_quotas(sb, ctx->quota_types))
+                       goto failed;
+       }
+#endif /* CONFIG_TMPFS_QUOTA */
+
        inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0,
                                VM_NORESERVE);
-       if (!inode)
+       if (IS_ERR(inode)) {
+               error = PTR_ERR(inode);
                goto failed;
+       }
        inode->i_uid = sbinfo->uid;
        inode->i_gid = sbinfo->gid;
        sb->s_root = d_make_root(inode);
@@ -4006,7 +4367,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
 
 failed:
        shmem_put_super(sb);
-       return -ENOMEM;
+       return error;
 }
 
 static int shmem_get_tree(struct fs_context *fc)
@@ -4056,6 +4417,8 @@ static void shmem_destroy_inode(struct inode *inode)
 {
        if (S_ISREG(inode->i_mode))
                mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+       if (S_ISDIR(inode->i_mode))
+               simple_offset_destroy(shmem_get_offset_ctx(inode));
 }
 
 static void shmem_init_inode(void *foo)
@@ -4099,12 +4462,12 @@ EXPORT_SYMBOL(shmem_aops);
 
 static const struct file_operations shmem_file_operations = {
        .mmap           = shmem_mmap,
-       .open           = generic_file_open,
+       .open           = shmem_file_open,
        .get_unmapped_area = shmem_get_unmapped_area,
 #ifdef CONFIG_TMPFS
        .llseek         = shmem_file_llseek,
        .read_iter      = shmem_file_read_iter,
-       .write_iter     = generic_file_write_iter,
+       .write_iter     = shmem_file_write_iter,
        .fsync          = noop_fsync,
        .splice_read    = shmem_file_splice_read,
        .splice_write   = iter_file_splice_write,
@@ -4136,6 +4499,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
        .mknod          = shmem_mknod,
        .rename         = shmem_rename2,
        .tmpfile        = shmem_tmpfile,
+       .get_offset_ctx = shmem_get_offset_ctx,
 #endif
 #ifdef CONFIG_TMPFS_XATTR
        .listxattr      = shmem_listxattr,
@@ -4167,6 +4531,9 @@ static const struct super_operations shmem_ops = {
        .statfs         = shmem_statfs,
        .show_options   = shmem_show_options,
 #endif
+#ifdef CONFIG_TMPFS_QUOTA
+       .get_dquots     = shmem_get_dquots,
+#endif
        .evict_inode    = shmem_evict_inode,
        .drop_inode     = generic_delete_inode,
        .put_super      = shmem_put_super,
@@ -4220,7 +4587,7 @@ static struct file_system_type shmem_fs_type = {
 #endif
        .kill_sb        = kill_litter_super,
 #ifdef CONFIG_SHMEM
-       .fs_flags       = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
+       .fs_flags       = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME,
 #else
        .fs_flags       = FS_USERNS_MOUNT,
 #endif
@@ -4232,6 +4599,14 @@ void __init shmem_init(void)
 
        shmem_init_inodecache();
 
+#ifdef CONFIG_TMPFS_QUOTA
+       error = register_quota_format(&shmem_quota_format);
+       if (error < 0) {
+               pr_err("Could not register quota format\n");
+               goto out3;
+       }
+#endif
+
        error = register_filesystem(&shmem_fs_type);
        if (error) {
                pr_err("Could not register tmpfs\n");
@@ -4256,6 +4631,10 @@ void __init shmem_init(void)
 out1:
        unregister_filesystem(&shmem_fs_type);
 out2:
+#ifdef CONFIG_TMPFS_QUOTA
+       unregister_quota_format(&shmem_quota_format);
+out3:
+#endif
        shmem_destroy_inodecache();
        shm_mnt = ERR_PTR(error);
 }
@@ -4375,10 +4754,16 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
 #define shmem_vm_ops                           generic_file_vm_ops
 #define shmem_anon_vm_ops                      generic_file_vm_ops
 #define shmem_file_operations                  ramfs_file_operations
-#define shmem_get_inode(idmap, sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
 #define shmem_acct_size(flags, size)           0
 #define shmem_unacct_size(flags, size)         do {} while (0)
 
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir,
+                                           umode_t mode, dev_t dev, unsigned long flags)
+{
+       struct inode *inode = ramfs_get_inode(sb, dir, mode, dev);
+       return inode ? inode : ERR_PTR(-ENOSPC);
+}
+
 #endif /* CONFIG_SHMEM */
 
 /* common code */
@@ -4403,9 +4788,10 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
 
        inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
                                S_IFREG | S_IRWXUGO, 0, flags);
-       if (unlikely(!inode)) {
+
+       if (IS_ERR(inode)) {
                shmem_unacct_size(flags, size);
-               return ERR_PTR(-ENOSPC);
+               return ERR_CAST(inode);
        }
        inode->i_flags |= i_flags;
        inode->i_size = size;
diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c
new file mode 100644 (file)
index 0000000..062d1c1
--- /dev/null
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * In memory quota format relies on quota infrastructure to store dquot
+ * information for us. While conventional quota formats for file systems
+ * with persistent storage can load quota information into dquot from the
+ * storage on-demand and hence quota dquot shrinker can free any dquot
+ * that is not currently being used, it must be avoided here. Otherwise we
+ * can lose valuable information, user provided limits, because there is
+ * no persistent storage to load the information from afterwards.
+ *
+ * One information that in-memory quota format needs to keep track of is
+ * a sorted list of ids for each quota type. This is done by utilizing
+ * an rb tree which root is stored in mem_dqinfo->dqi_priv for each quota
+ * type.
+ *
+ * This format can be used to support quota on file system without persistent
+ * storage such as tmpfs.
+ *
+ * Author:     Lukas Czerner <lczerner@redhat.com>
+ *             Carlos Maiolino <cmaiolino@redhat.com>
+ *
+ * Copyright (C) 2023 Red Hat, Inc.
+ */
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/shmem_fs.h>
+
+#include <linux/quotaops.h>
+#include <linux/quota.h>
+
+#ifdef CONFIG_TMPFS_QUOTA
+
+/*
+ * The following constants define the amount of time given a user
+ * before the soft limits are treated as hard limits (usually resulting
+ * in an allocation failure). The timer is started when the user crosses
+ * their soft limit, it is reset when they go below their soft limit.
+ */
+#define SHMEM_MAX_IQ_TIME 604800       /* (7*24*60*60) 1 week */
+#define SHMEM_MAX_DQ_TIME 604800       /* (7*24*60*60) 1 week */
+
+struct quota_id {
+       struct rb_node  node;
+       qid_t           id;
+       qsize_t         bhardlimit;
+       qsize_t         bsoftlimit;
+       qsize_t         ihardlimit;
+       qsize_t         isoftlimit;
+};
+
+static int shmem_check_quota_file(struct super_block *sb, int type)
+{
+       /* There is no real quota file, nothing to do */
+       return 1;
+}
+
+/*
+ * There is no real quota file. Just allocate rb_root for quota ids and
+ * set limits
+ */
+static int shmem_read_file_info(struct super_block *sb, int type)
+{
+       struct quota_info *dqopt = sb_dqopt(sb);
+       struct mem_dqinfo *info = &dqopt->info[type];
+
+       info->dqi_priv = kzalloc(sizeof(struct rb_root), GFP_NOFS);
+       if (!info->dqi_priv)
+               return -ENOMEM;
+
+       info->dqi_max_spc_limit = SHMEM_QUOTA_MAX_SPC_LIMIT;
+       info->dqi_max_ino_limit = SHMEM_QUOTA_MAX_INO_LIMIT;
+
+       info->dqi_bgrace = SHMEM_MAX_DQ_TIME;
+       info->dqi_igrace = SHMEM_MAX_IQ_TIME;
+       info->dqi_flags = 0;
+
+       return 0;
+}
+
+static int shmem_write_file_info(struct super_block *sb, int type)
+{
+       /* There is no real quota file, nothing to do */
+       return 0;
+}
+
+/*
+ * Free all the quota_id entries in the rb tree and rb_root.
+ */
+static int shmem_free_file_info(struct super_block *sb, int type)
+{
+       struct mem_dqinfo *info = &sb_dqopt(sb)->info[type];
+       struct rb_root *root = info->dqi_priv;
+       struct quota_id *entry;
+       struct rb_node *node;
+
+       info->dqi_priv = NULL;
+       node = rb_first(root);
+       while (node) {
+               entry = rb_entry(node, struct quota_id, node);
+               node = rb_next(&entry->node);
+
+               rb_erase(&entry->node, root);
+               kfree(entry);
+       }
+
+       kfree(root);
+       return 0;
+}
+
+static int shmem_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+       struct mem_dqinfo *info = sb_dqinfo(sb, qid->type);
+       struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+       qid_t id = from_kqid(&init_user_ns, *qid);
+       struct quota_info *dqopt = sb_dqopt(sb);
+       struct quota_id *entry = NULL;
+       int ret = 0;
+
+       if (!sb_has_quota_active(sb, qid->type))
+               return -ESRCH;
+
+       down_read(&dqopt->dqio_sem);
+       while (node) {
+               entry = rb_entry(node, struct quota_id, node);
+
+               if (id < entry->id)
+                       node = node->rb_left;
+               else if (id > entry->id)
+                       node = node->rb_right;
+               else
+                       goto got_next_id;
+       }
+
+       if (!entry) {
+               ret = -ENOENT;
+               goto out_unlock;
+       }
+
+       if (id > entry->id) {
+               node = rb_next(&entry->node);
+               if (!node) {
+                       ret = -ENOENT;
+                       goto out_unlock;
+               }
+               entry = rb_entry(node, struct quota_id, node);
+       }
+
+got_next_id:
+       *qid = make_kqid(&init_user_ns, qid->type, entry->id);
+out_unlock:
+       up_read(&dqopt->dqio_sem);
+       return ret;
+}
+
+/*
+ * Load dquot with limits from existing entry, or create the new entry if
+ * it does not exist.
+ */
+static int shmem_acquire_dquot(struct dquot *dquot)
+{
+       struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
+       struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node;
+       struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info;
+       struct rb_node *parent = NULL, *new_node = NULL;
+       struct quota_id *new_entry, *entry;
+       qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
+       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+       int ret = 0;
+
+       mutex_lock(&dquot->dq_lock);
+
+       down_write(&dqopt->dqio_sem);
+       while (*n) {
+               parent = *n;
+               entry = rb_entry(parent, struct quota_id, node);
+
+               if (id < entry->id)
+                       n = &(*n)->rb_left;
+               else if (id > entry->id)
+                       n = &(*n)->rb_right;
+               else
+                       goto found;
+       }
+
+       /* We don't have entry for this id yet, create it */
+       new_entry = kzalloc(sizeof(struct quota_id), GFP_NOFS);
+       if (!new_entry) {
+               ret = -ENOMEM;
+               goto out_unlock;
+       }
+
+       new_entry->id = id;
+       if (dquot->dq_id.type == USRQUOTA) {
+               new_entry->bhardlimit = sbinfo->qlimits.usrquota_bhardlimit;
+               new_entry->ihardlimit = sbinfo->qlimits.usrquota_ihardlimit;
+       } else if (dquot->dq_id.type == GRPQUOTA) {
+               new_entry->bhardlimit = sbinfo->qlimits.grpquota_bhardlimit;
+               new_entry->ihardlimit = sbinfo->qlimits.grpquota_ihardlimit;
+       }
+
+       new_node = &new_entry->node;
+       rb_link_node(new_node, parent, n);
+       rb_insert_color(new_node, (struct rb_root *)info->dqi_priv);
+       entry = new_entry;
+
+found:
+       /* Load the stored limits from the tree */
+       spin_lock(&dquot->dq_dqb_lock);
+       dquot->dq_dqb.dqb_bhardlimit = entry->bhardlimit;
+       dquot->dq_dqb.dqb_bsoftlimit = entry->bsoftlimit;
+       dquot->dq_dqb.dqb_ihardlimit = entry->ihardlimit;
+       dquot->dq_dqb.dqb_isoftlimit = entry->isoftlimit;
+
+       if (!dquot->dq_dqb.dqb_bhardlimit &&
+           !dquot->dq_dqb.dqb_bsoftlimit &&
+           !dquot->dq_dqb.dqb_ihardlimit &&
+           !dquot->dq_dqb.dqb_isoftlimit)
+               set_bit(DQ_FAKE_B, &dquot->dq_flags);
+       spin_unlock(&dquot->dq_dqb_lock);
+
+       /* Make sure flags update is visible after dquot has been filled */
+       smp_mb__before_atomic();
+       set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+out_unlock:
+       up_write(&dqopt->dqio_sem);
+       mutex_unlock(&dquot->dq_lock);
+       return ret;
+}
+
+static bool shmem_is_empty_dquot(struct dquot *dquot)
+{
+       struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info;
+       qsize_t bhardlimit;
+       qsize_t ihardlimit;
+
+       if (dquot->dq_id.type == USRQUOTA) {
+               bhardlimit = sbinfo->qlimits.usrquota_bhardlimit;
+               ihardlimit = sbinfo->qlimits.usrquota_ihardlimit;
+       } else if (dquot->dq_id.type == GRPQUOTA) {
+               bhardlimit = sbinfo->qlimits.grpquota_bhardlimit;
+               ihardlimit = sbinfo->qlimits.grpquota_ihardlimit;
+       }
+
+       if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+               (dquot->dq_dqb.dqb_curspace == 0 &&
+                dquot->dq_dqb.dqb_curinodes == 0 &&
+                dquot->dq_dqb.dqb_bhardlimit == bhardlimit &&
+                dquot->dq_dqb.dqb_ihardlimit == ihardlimit))
+               return true;
+
+       return false;
+}
+/*
+ * Store limits from dquot in the tree unless it's fake. If it is fake
+ * remove the id from the tree since there is no useful information in
+ * there.
+ */
+static int shmem_release_dquot(struct dquot *dquot)
+{
+       struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
+       struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+       qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
+       struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+       struct quota_id *entry = NULL;
+
+       mutex_lock(&dquot->dq_lock);
+       /* Check whether we are not racing with some other dqget() */
+       if (dquot_is_busy(dquot))
+               goto out_dqlock;
+
+       down_write(&dqopt->dqio_sem);
+       while (node) {
+               entry = rb_entry(node, struct quota_id, node);
+
+               if (id < entry->id)
+                       node = node->rb_left;
+               else if (id > entry->id)
+                       node = node->rb_right;
+               else
+                       goto found;
+       }
+
+       /* We should always find the entry in the rb tree */
+       WARN_ONCE(1, "quota id %u from dquot %p, not in rb tree!\n", id, dquot);
+       up_write(&dqopt->dqio_sem);
+       mutex_unlock(&dquot->dq_lock);
+       return -ENOENT;
+
+found:
+       if (shmem_is_empty_dquot(dquot)) {
+               /* Remove entry from the tree */
+               rb_erase(&entry->node, info->dqi_priv);
+               kfree(entry);
+       } else {
+               /* Store the limits in the tree */
+               spin_lock(&dquot->dq_dqb_lock);
+               entry->bhardlimit = dquot->dq_dqb.dqb_bhardlimit;
+               entry->bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit;
+               entry->ihardlimit = dquot->dq_dqb.dqb_ihardlimit;
+               entry->isoftlimit = dquot->dq_dqb.dqb_isoftlimit;
+               spin_unlock(&dquot->dq_dqb_lock);
+       }
+
+       clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+       up_write(&dqopt->dqio_sem);
+
+out_dqlock:
+       mutex_unlock(&dquot->dq_lock);
+       return 0;
+}
+
+static int shmem_mark_dquot_dirty(struct dquot *dquot)
+{
+       return 0;
+}
+
+static int shmem_dquot_write_info(struct super_block *sb, int type)
+{
+       return 0;
+}
+
+static const struct quota_format_ops shmem_format_ops = {
+       .check_quota_file       = shmem_check_quota_file,
+       .read_file_info         = shmem_read_file_info,
+       .write_file_info        = shmem_write_file_info,
+       .free_file_info         = shmem_free_file_info,
+};
+
+struct quota_format_type shmem_quota_format = {
+       .qf_fmt_id = QFMT_SHMEM,
+       .qf_ops = &shmem_format_ops,
+       .qf_owner = THIS_MODULE
+};
+
+const struct dquot_operations shmem_quota_operations = {
+       .acquire_dquot          = shmem_acquire_dquot,
+       .release_dquot          = shmem_release_dquot,
+       .alloc_dquot            = dquot_alloc,
+       .destroy_dquot          = dquot_destroy,
+       .write_info             = shmem_dquot_write_info,
+       .mark_dirty             = shmem_mark_dquot_dirty,
+       .get_next_id            = shmem_get_next_id,
+};
+#endif /* CONFIG_TMPFS_QUOTA */
index 8e6dde6..b15112b 100644 (file)
@@ -1746,7 +1746,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        struct page *swapcache;
        spinlock_t *ptl;
        pte_t *pte, new_pte, old_pte;
-       bool hwposioned = false;
+       bool hwpoisoned = PageHWPoison(page);
        int ret = 1;
 
        swapcache = page;
@@ -1754,7 +1754,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
        if (unlikely(!page))
                return -ENOMEM;
        else if (unlikely(PTR_ERR(page) == -EHWPOISON))
-               hwposioned = true;
+               hwpoisoned = true;
 
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
@@ -1765,11 +1765,11 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 
        old_pte = ptep_get(pte);
 
-       if (unlikely(hwposioned || !PageUptodate(page))) {
+       if (unlikely(hwpoisoned || !PageUptodate(page))) {
                swp_entry_t swp_entry;
 
                dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
-               if (hwposioned) {
+               if (hwpoisoned) {
                        swp_entry = make_hwpoison_entry(swapcache);
                        page = swapcache;
                } else {
index 95d1291..c3320e6 100644 (file)
@@ -657,11 +657,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                        }
 
                        folio_lock(folio);
-                       VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
-                       if (folio->mapping != mapping) {
+                       if (unlikely(folio->mapping != mapping)) {
                                folio_unlock(folio);
                                continue;
                        }
+                       VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
                        folio_wait_writeback(folio);
 
                        if (folio_mapped(folio))
index 93cf99a..228a4a5 100644 (file)
@@ -2979,6 +2979,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
                free_vm_area(area);
                return NULL;
        }
+
+       flush_cache_vmap((unsigned long)area->addr,
+                        (unsigned long)area->addr + count * PAGE_SIZE);
+
        return area->addr;
 }
 EXPORT_SYMBOL_GPL(vmap_pfn);
index 1080209..2fe4a11 100644 (file)
@@ -4284,6 +4284,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
        static const struct mm_walk_ops mm_walk_ops = {
                .test_walk = should_skip_vma,
                .p4d_entry = walk_pud_range,
+               .walk_lock = PGWALK_RDLOCK,
        };
 
        int err;
@@ -4853,16 +4854,17 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
 
                spin_lock_irq(&pgdat->memcg_lru.lock);
 
-               VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
+               if (hlist_nulls_unhashed(&lruvec->lrugen.list))
+                       goto unlock;
 
                gen = lruvec->lrugen.gen;
 
-               hlist_nulls_del_rcu(&lruvec->lrugen.list);
+               hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
                pgdat->memcg_lru.nr_memcgs[gen]--;
 
                if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
                        WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
-
+unlock:
                spin_unlock_irq(&pgdat->memcg_lru.lock);
        }
 }
@@ -5434,8 +5436,10 @@ restart:
        rcu_read_lock();
 
        hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
-               if (op)
+               if (op) {
                        lru_gen_rotate_memcg(lruvec, op);
+                       op = 0;
+               }
 
                mem_cgroup_put(memcg);
 
@@ -5443,7 +5447,7 @@ restart:
                memcg = lruvec_memcg(lruvec);
 
                if (!mem_cgroup_tryget(memcg)) {
-                       op = 0;
+                       lru_gen_release_memcg(memcg);
                        memcg = NULL;
                        continue;
                }
index 3f05797..32916d2 100644 (file)
@@ -1798,6 +1798,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
 
 static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
 {
+       struct zs_pool *pool;
        struct zspage *zspage;
 
        /*
@@ -1807,9 +1808,10 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
        VM_BUG_ON_PAGE(PageIsolated(page), page);
 
        zspage = get_zspage(page);
-       migrate_write_lock(zspage);
+       pool = zspage->pool;
+       spin_lock(&pool->lock);
        inc_zspage_isolation(zspage);
-       migrate_write_unlock(zspage);
+       spin_unlock(&pool->lock);
 
        return true;
 }
@@ -1875,12 +1877,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
        kunmap_atomic(s_addr);
 
        replace_sub_page(class, zspage, newpage, page);
+       dec_zspage_isolation(zspage);
        /*
         * Since we complete the data copy and set up new zspage structure,
         * it's okay to release the pool's lock.
         */
        spin_unlock(&pool->lock);
-       dec_zspage_isolation(zspage);
        migrate_write_unlock(zspage);
 
        get_page(newpage);
@@ -1897,14 +1899,16 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 
 static void zs_page_putback(struct page *page)
 {
+       struct zs_pool *pool;
        struct zspage *zspage;
 
        VM_BUG_ON_PAGE(!PageIsolated(page), page);
 
        zspage = get_zspage(page);
-       migrate_write_lock(zspage);
+       pool = zspage->pool;
+       spin_lock(&pool->lock);
        dec_zspage_isolation(zspage);
-       migrate_write_unlock(zspage);
+       spin_unlock(&pool->lock);
 }
 
 static const struct movable_operations zsmalloc_mops = {
index a334026..86bbc71 100644 (file)
@@ -904,7 +904,7 @@ EXPORT_SYMBOL(do_trace_9p_fid_put);
 
 static int p9_client_version(struct p9_client *c)
 {
-       int err = 0;
+       int err;
        struct p9_req_t *req;
        char *version = NULL;
        int msize;
@@ -975,7 +975,6 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
        struct p9_client *clnt;
        char *client_id;
 
-       err = 0;
        clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
        if (!clnt)
                return ERR_PTR(-ENOMEM);
@@ -1094,7 +1093,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
                                const char *uname, kuid_t n_uname,
                                const char *aname)
 {
-       int err = 0;
+       int err;
        struct p9_req_t *req;
        struct p9_fid *fid;
        struct p9_qid qid;
@@ -1147,7 +1146,6 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
        struct p9_req_t *req;
        u16 nwqids, count;
 
-       err = 0;
        wqids = NULL;
        clnt = oldfid->clnt;
        if (clone) {
@@ -1224,7 +1222,6 @@ int p9_client_open(struct p9_fid *fid, int mode)
        clnt = fid->clnt;
        p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
                 p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
-       err = 0;
 
        if (fid->mode != -1)
                return -EINVAL;
@@ -1262,7 +1259,7 @@ EXPORT_SYMBOL(p9_client_open);
 int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
                          u32 mode, kgid_t gid, struct p9_qid *qid)
 {
-       int err = 0;
+       int err;
        struct p9_client *clnt;
        struct p9_req_t *req;
        int iounit;
@@ -1314,7 +1311,6 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
 
        p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
                 fid->fid, name, perm, mode);
-       err = 0;
        clnt = fid->clnt;
 
        if (fid->mode != -1)
@@ -1350,7 +1346,7 @@ EXPORT_SYMBOL(p9_client_fcreate);
 int p9_client_symlink(struct p9_fid *dfid, const char *name,
                      const char *symtgt, kgid_t gid, struct p9_qid *qid)
 {
-       int err = 0;
+       int err;
        struct p9_client *clnt;
        struct p9_req_t *req;
 
@@ -1402,13 +1398,12 @@ EXPORT_SYMBOL(p9_client_link);
 
 int p9_client_fsync(struct p9_fid *fid, int datasync)
 {
-       int err;
+       int err = 0;
        struct p9_client *clnt;
        struct p9_req_t *req;
 
        p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
                 fid->fid, datasync);
-       err = 0;
        clnt = fid->clnt;
 
        req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
@@ -1428,7 +1423,7 @@ EXPORT_SYMBOL(p9_client_fsync);
 
 int p9_client_clunk(struct p9_fid *fid)
 {
-       int err;
+       int err = 0;
        struct p9_client *clnt;
        struct p9_req_t *req;
        int retries = 0;
@@ -1436,7 +1431,6 @@ int p9_client_clunk(struct p9_fid *fid)
 again:
        p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n",
                 fid->fid, retries);
-       err = 0;
        clnt = fid->clnt;
 
        req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid);
@@ -1465,12 +1459,11 @@ EXPORT_SYMBOL(p9_client_clunk);
 
 int p9_client_remove(struct p9_fid *fid)
 {
-       int err;
+       int err = 0;
        struct p9_client *clnt;
        struct p9_req_t *req;
 
        p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
-       err = 0;
        clnt = fid->clnt;
 
        req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid);
@@ -1680,7 +1673,6 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
        if (!ret)
                return ERR_PTR(-ENOMEM);
 
-       err = 0;
        clnt = fid->clnt;
 
        req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid);
@@ -1733,7 +1725,6 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
        if (!ret)
                return ERR_PTR(-ENOMEM);
 
-       err = 0;
        clnt = fid->clnt;
 
        req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask);
@@ -1812,11 +1803,10 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
 
 int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
 {
-       int err;
+       int err = 0;
        struct p9_req_t *req;
        struct p9_client *clnt;
 
-       err = 0;
        clnt = fid->clnt;
        wst->size = p9_client_statsize(wst, clnt->proto_version);
        p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n",
@@ -1851,11 +1841,10 @@ EXPORT_SYMBOL(p9_client_wstat);
 
 int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
 {
-       int err;
+       int err = 0;
        struct p9_req_t *req;
        struct p9_client *clnt;
 
-       err = 0;
        clnt = fid->clnt;
        p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
        p9_debug(P9_DEBUG_9P, "    valid=%x mode=%x uid=%d gid=%d size=%lld\n",
@@ -1887,7 +1876,6 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
        struct p9_req_t *req;
        struct p9_client *clnt;
 
-       err = 0;
        clnt = fid->clnt;
 
        p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
@@ -1921,11 +1909,10 @@ EXPORT_SYMBOL(p9_client_statfs);
 int p9_client_rename(struct p9_fid *fid,
                     struct p9_fid *newdirfid, const char *name)
 {
-       int err;
+       int err = 0;
        struct p9_req_t *req;
        struct p9_client *clnt;
 
-       err = 0;
        clnt = fid->clnt;
 
        p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
@@ -1949,11 +1936,10 @@ EXPORT_SYMBOL(p9_client_rename);
 int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
                       struct p9_fid *newdirfid, const char *new_name)
 {
-       int err;
+       int err = 0;
        struct p9_req_t *req;
        struct p9_client *clnt;
 
-       err = 0;
        clnt = olddirfid->clnt;
 
        p9_debug(P9_DEBUG_9P,
@@ -1986,7 +1972,6 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
        struct p9_client *clnt;
        struct p9_fid *attr_fid;
 
-       err = 0;
        clnt = file_fid->clnt;
        attr_fid = p9_fid_create(clnt);
        if (!attr_fid) {
@@ -2027,14 +2012,13 @@ EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
 int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
                          u64 attr_size, int flags)
 {
-       int err;
+       int err = 0;
        struct p9_req_t *req;
        struct p9_client *clnt;
 
        p9_debug(P9_DEBUG_9P,
                 ">>> TXATTRCREATE fid %d name  %s size %llu flag %d\n",
                 fid->fid, name, attr_size, flags);
-       err = 0;
        clnt = fid->clnt;
        req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
                            fid->fid, name, attr_size, flags);
@@ -2063,7 +2047,6 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
        p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
                 fid->fid, offset, count);
 
-       err = 0;
        clnt = fid->clnt;
 
        rsize = fid->iounit;
@@ -2122,7 +2105,6 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
        struct p9_client *clnt;
        struct p9_req_t *req;
 
-       err = 0;
        clnt = fid->clnt;
        p9_debug(P9_DEBUG_9P,
                 ">>> TMKNOD fid %d name %s mode %d major %d minor %d\n",
@@ -2153,7 +2135,6 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
        struct p9_client *clnt;
        struct p9_req_t *req;
 
-       err = 0;
        clnt = fid->clnt;
        p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
                 fid->fid, name, mode, from_kgid(&init_user_ns, gid));
@@ -2182,7 +2163,6 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
        struct p9_client *clnt;
        struct p9_req_t *req;
 
-       err = 0;
        clnt = fid->clnt;
        p9_debug(P9_DEBUG_9P,
                 ">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n",
@@ -2214,7 +2194,6 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
        struct p9_client *clnt;
        struct p9_req_t *req;
 
-       err = 0;
        clnt = fid->clnt;
        p9_debug(P9_DEBUG_9P,
                 ">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n",
@@ -2251,7 +2230,6 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
        struct p9_client *clnt;
        struct p9_req_t *req;
 
-       err = 0;
        clnt = fid->clnt;
        p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
 
index 3c27ffb..e305071 100644 (file)
@@ -384,7 +384,7 @@ static void handle_rerror(struct p9_req_t *req, int in_hdr_len,
        void *to = req->rc.sdata + in_hdr_len;
 
        // Fits entirely into the static data?  Nothing to do.
-       if (req->rc.size < in_hdr_len)
+       if (req->rc.size < in_hdr_len || !pages)
                return;
 
        // Really long error message?  Tough, truncate the reply.  Might get
@@ -428,7 +428,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
        struct page **in_pages = NULL, **out_pages = NULL;
        struct virtio_chan *chan = client->trans;
        struct scatterlist *sgs[4];
-       size_t offs;
+       size_t offs = 0;
        int need_drop = 0;
        int kicked = 0;
 
@@ -501,8 +501,8 @@ req_retry_pinned:
 
        if (in_pages) {
                sgs[out_sgs + in_sgs++] = chan->sg + out + in;
-               in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
-                                    in_pages, in_nr_pages, offs, inlen);
+               pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
+                              in_pages, in_nr_pages, offs, inlen);
        }
 
        BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
index acff565..1d70457 100644 (file)
@@ -505,7 +505,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
        struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
        struct batadv_elp_packet *elp_packet;
        struct batadv_hard_iface *primary_if;
-       struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+       struct ethhdr *ethhdr;
        bool res;
        int ret = NET_RX_DROP;
 
@@ -513,6 +513,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
        if (!res)
                goto free_skb;
 
+       ethhdr = eth_hdr(skb);
        if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
                goto free_skb;
 
index e710e9a..e503ee0 100644 (file)
@@ -123,8 +123,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
 {
        struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
 
-       if (hard_iface->if_status != BATADV_IF_ACTIVE)
+       if (hard_iface->if_status != BATADV_IF_ACTIVE) {
+               kfree_skb(skb);
                return;
+       }
 
        batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
        batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
@@ -985,7 +987,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
 {
        struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
        struct batadv_ogm2_packet *ogm_packet;
-       struct ethhdr *ethhdr = eth_hdr(skb);
+       struct ethhdr *ethhdr;
        int ogm_offset;
        u8 *packet_pos;
        int ret = NET_RX_DROP;
@@ -999,6 +1001,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
        if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
                goto free_skb;
 
+       ethhdr = eth_hdr(skb);
        if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
                goto free_skb;
 
index 41c1ad3..24c9c0c 100644 (file)
@@ -630,7 +630,19 @@ out:
  */
 void batadv_update_min_mtu(struct net_device *soft_iface)
 {
-       soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
+       struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+       int limit_mtu;
+       int mtu;
+
+       mtu = batadv_hardif_min_mtu(soft_iface);
+
+       if (bat_priv->mtu_set_by_user)
+               limit_mtu = bat_priv->mtu_set_by_user;
+       else
+               limit_mtu = ETH_DATA_LEN;
+
+       mtu = min(mtu, limit_mtu);
+       dev_set_mtu(soft_iface, mtu);
 
        /* Check if the local translate table should be cleaned up to match a
         * new (and smaller) MTU.
index ad5714f..6efbc92 100644 (file)
@@ -495,7 +495,10 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
                attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
 
                atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+
+               rtnl_lock();
                batadv_update_min_mtu(bat_priv->soft_iface);
+               rtnl_unlock();
        }
 
        if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
index d3fdf82..85d00dc 100644 (file)
@@ -153,11 +153,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 
 static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
 {
+       struct batadv_priv *bat_priv = netdev_priv(dev);
+
        /* check ranges */
        if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
                return -EINVAL;
 
        dev->mtu = new_mtu;
+       bat_priv->mtu_set_by_user = new_mtu;
 
        return 0;
 }
index 36ca312..b95c367 100644 (file)
@@ -774,7 +774,6 @@ check_roaming:
                if (roamed_back) {
                        batadv_tt_global_free(bat_priv, tt_global,
                                              "Roaming canceled");
-                       tt_global = NULL;
                } else {
                        /* The global entry has to be marked as ROAMING and
                         * has to be kept for consistency purpose
index ca9449e..cf1a0ea 100644 (file)
@@ -1547,6 +1547,12 @@ struct batadv_priv {
        struct net_device *soft_iface;
 
        /**
+        * @mtu_set_by_user: MTU was set once by user
+        * protected by rtnl_lock
+        */
+       int mtu_set_by_user;
+
+       /**
         * @bat_counters: mesh internal traffic statistic counters (see
         *  batadv_counters)
         */
index 056f951..7622256 100644 (file)
@@ -118,7 +118,7 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
         */
        params->explicit_connect = false;
 
-       list_del_init(&params->action);
+       hci_pend_le_list_del_init(params);
 
        switch (params->auto_connect) {
        case HCI_AUTO_CONN_EXPLICIT:
@@ -127,10 +127,10 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
                return;
        case HCI_AUTO_CONN_DIRECT:
        case HCI_AUTO_CONN_ALWAYS:
-               list_add(&params->action, &hdev->pend_le_conns);
+               hci_pend_le_list_add(params, &hdev->pend_le_conns);
                break;
        case HCI_AUTO_CONN_REPORT:
-               list_add(&params->action, &hdev->pend_le_reports);
+               hci_pend_le_list_add(params, &hdev->pend_le_reports);
                break;
        default:
                break;
@@ -1426,8 +1426,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
        if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
            params->auto_connect == HCI_AUTO_CONN_REPORT ||
            params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
-               list_del_init(&params->action);
-               list_add(&params->action, &hdev->pend_le_conns);
+               hci_pend_le_list_del_init(params);
+               hci_pend_le_list_add(params, &hdev->pend_le_conns);
        }
 
        params->explicit_connect = true;
@@ -1684,7 +1684,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
        if (!link) {
                hci_conn_drop(acl);
                hci_conn_drop(sco);
-               return NULL;
+               return ERR_PTR(-ENOLINK);
        }
 
        sco->setting = setting;
@@ -2254,7 +2254,7 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
        if (!link) {
                hci_conn_drop(le);
                hci_conn_drop(cis);
-               return NULL;
+               return ERR_PTR(-ENOLINK);
        }
 
        /* If LE is already connected and CIS handle is already set proceed to
index 48917c6..1ec8398 100644 (file)
@@ -1972,6 +1972,7 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
                                  struct adv_monitor *monitor)
 {
        int status = 0;
+       int handle;
 
        switch (hci_get_adv_monitor_offload_ext(hdev)) {
        case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
@@ -1980,9 +1981,10 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
                goto free_monitor;
 
        case HCI_ADV_MONITOR_EXT_MSFT:
+               handle = monitor->handle;
                status = msft_remove_monitor(hdev, monitor);
                bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
-                          hdev->name, monitor->handle, status);
+                          hdev->name, handle, status);
                break;
        }
 
@@ -2249,22 +2251,46 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
        return NULL;
 }
 
-/* This function requires the caller holds hdev->lock */
+/* This function requires the caller holds hdev->lock or rcu_read_lock */
 struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
                                                  bdaddr_t *addr, u8 addr_type)
 {
        struct hci_conn_params *param;
 
-       list_for_each_entry(param, list, action) {
+       rcu_read_lock();
+
+       list_for_each_entry_rcu(param, list, action) {
                if (bacmp(&param->addr, addr) == 0 &&
-                   param->addr_type == addr_type)
+                   param->addr_type == addr_type) {
+                       rcu_read_unlock();
                        return param;
+               }
        }
 
+       rcu_read_unlock();
+
        return NULL;
 }
 
 /* This function requires the caller holds hdev->lock */
+void hci_pend_le_list_del_init(struct hci_conn_params *param)
+{
+       if (list_empty(&param->action))
+               return;
+
+       list_del_rcu(&param->action);
+       synchronize_rcu();
+       INIT_LIST_HEAD(&param->action);
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_pend_le_list_add(struct hci_conn_params *param,
+                         struct list_head *list)
+{
+       list_add_rcu(&param->action, list);
+}
+
+/* This function requires the caller holds hdev->lock */
 struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
                                            bdaddr_t *addr, u8 addr_type)
 {
@@ -2297,14 +2323,15 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
        return params;
 }
 
-static void hci_conn_params_free(struct hci_conn_params *params)
+void hci_conn_params_free(struct hci_conn_params *params)
 {
+       hci_pend_le_list_del_init(params);
+
        if (params->conn) {
                hci_conn_drop(params->conn);
                hci_conn_put(params->conn);
        }
 
-       list_del(&params->action);
        list_del(&params->list);
        kfree(params);
 }
@@ -2342,8 +2369,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
                        continue;
                }
 
-               list_del(&params->list);
-               kfree(params);
+               hci_conn_params_free(params);
        }
 
        BT_DBG("All LE disabled connection parameters were removed");
index 95816a9..31ca320 100644 (file)
@@ -1564,7 +1564,7 @@ static u8 hci_cc_le_set_privacy_mode(struct hci_dev *hdev, void *data,
 
        params = hci_conn_params_lookup(hdev, &cp->bdaddr, cp->bdaddr_type);
        if (params)
-               params->privacy_mode = cp->mode;
+               WRITE_ONCE(params->privacy_mode, cp->mode);
 
        hci_dev_unlock(hdev);
 
@@ -2784,6 +2784,9 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
                        hci_enable_advertising(hdev);
                }
 
+               /* Inform sockets conn is gone before we delete it */
+               hci_disconn_cfm(conn, HCI_ERROR_UNSPECIFIED);
+
                goto done;
        }
 
@@ -2804,8 +2807,8 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
 
                case HCI_AUTO_CONN_DIRECT:
                case HCI_AUTO_CONN_ALWAYS:
-                       list_del_init(&params->action);
-                       list_add(&params->action, &hdev->pend_le_conns);
+                       hci_pend_le_list_del_init(params);
+                       hci_pend_le_list_add(params, &hdev->pend_le_conns);
                        break;
 
                default:
@@ -3423,8 +3426,8 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
 
                case HCI_AUTO_CONN_DIRECT:
                case HCI_AUTO_CONN_ALWAYS:
-                       list_del_init(&params->action);
-                       list_add(&params->action, &hdev->pend_le_conns);
+                       hci_pend_le_list_del_init(params);
+                       hci_pend_le_list_add(params, &hdev->pend_le_conns);
                        hci_update_passive_scan(hdev);
                        break;
 
@@ -5962,7 +5965,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
        params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
                                           conn->dst_type);
        if (params) {
-               list_del_init(&params->action);
+               hci_pend_le_list_del_init(params);
                if (params->conn) {
                        hci_conn_drop(params->conn);
                        hci_conn_put(params->conn);
index 8561616..4d1e32b 100644 (file)
@@ -2160,15 +2160,23 @@ static int hci_le_del_accept_list_sync(struct hci_dev *hdev,
        return 0;
 }
 
+struct conn_params {
+       bdaddr_t addr;
+       u8 addr_type;
+       hci_conn_flags_t flags;
+       u8 privacy_mode;
+};
+
 /* Adds connection to resolve list if needed.
  * Setting params to NULL programs local hdev->irk
  */
 static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
-                                       struct hci_conn_params *params)
+                                       struct conn_params *params)
 {
        struct hci_cp_le_add_to_resolv_list cp;
        struct smp_irk *irk;
        struct bdaddr_list_with_irk *entry;
+       struct hci_conn_params *p;
 
        if (!use_ll_privacy(hdev))
                return 0;
@@ -2203,6 +2211,16 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
        /* Default privacy mode is always Network */
        params->privacy_mode = HCI_NETWORK_PRIVACY;
 
+       rcu_read_lock();
+       p = hci_pend_le_action_lookup(&hdev->pend_le_conns,
+                                     &params->addr, params->addr_type);
+       if (!p)
+               p = hci_pend_le_action_lookup(&hdev->pend_le_reports,
+                                             &params->addr, params->addr_type);
+       if (p)
+               WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY);
+       rcu_read_unlock();
+
 done:
        if (hci_dev_test_flag(hdev, HCI_PRIVACY))
                memcpy(cp.local_irk, hdev->irk, 16);
@@ -2215,7 +2233,7 @@ done:
 
 /* Set Device Privacy Mode. */
 static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
-                                       struct hci_conn_params *params)
+                                       struct conn_params *params)
 {
        struct hci_cp_le_set_privacy_mode cp;
        struct smp_irk *irk;
@@ -2240,6 +2258,8 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
        bacpy(&cp.bdaddr, &irk->bdaddr);
        cp.mode = HCI_DEVICE_PRIVACY;
 
+       /* Note: params->privacy_mode is not updated since it is a copy */
+
        return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE,
                                     sizeof(cp), &cp, HCI_CMD_TIMEOUT);
 }
@@ -2249,7 +2269,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
  * properly set the privacy mode.
  */
 static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
-                                      struct hci_conn_params *params,
+                                      struct conn_params *params,
                                       u8 *num_entries)
 {
        struct hci_cp_le_add_to_accept_list cp;
@@ -2447,6 +2467,52 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
        return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk);
 }
 
+static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
+{
+       struct hci_conn_params *params;
+       struct conn_params *p;
+       size_t i;
+
+       rcu_read_lock();
+
+       i = 0;
+       list_for_each_entry_rcu(params, list, action)
+               ++i;
+       *n = i;
+
+       rcu_read_unlock();
+
+       p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL);
+       if (!p)
+               return NULL;
+
+       rcu_read_lock();
+
+       i = 0;
+       list_for_each_entry_rcu(params, list, action) {
+               /* Racing adds are handled in next scan update */
+               if (i >= *n)
+                       break;
+
+               /* No hdev->lock, but: addr, addr_type are immutable.
+                * privacy_mode is only written by us or in
+                * hci_cc_le_set_privacy_mode that we wait for.
+                * We should be idempotent so MGMT updating flags
+                * while we are processing is OK.
+                */
+               bacpy(&p[i].addr, &params->addr);
+               p[i].addr_type = params->addr_type;
+               p[i].flags = READ_ONCE(params->flags);
+               p[i].privacy_mode = READ_ONCE(params->privacy_mode);
+               ++i;
+       }
+
+       rcu_read_unlock();
+
+       *n = i;
+       return p;
+}
+
 /* Device must not be scanning when updating the accept list.
  *
  * Update is done using the following sequence:
@@ -2466,11 +2532,12 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
  */
 static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
 {
-       struct hci_conn_params *params;
+       struct conn_params *params;
        struct bdaddr_list *b, *t;
        u8 num_entries = 0;
        bool pend_conn, pend_report;
        u8 filter_policy;
+       size_t i, n;
        int err;
 
        /* Pause advertising if resolving list can be used as controllers
@@ -2504,6 +2571,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
                if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type))
                        continue;
 
+               /* Pointers not dereferenced, no locks needed */
                pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
                                                      &b->bdaddr,
                                                      b->bdaddr_type);
@@ -2532,23 +2600,50 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
         * available accept list entries in the controller, then
         * just abort and return filer policy value to not use the
         * accept list.
+        *
+        * The list and params may be mutated while we wait for events,
+        * so make a copy and iterate it.
         */
-       list_for_each_entry(params, &hdev->pend_le_conns, action) {
-               err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
-               if (err)
+
+       params = conn_params_copy(&hdev->pend_le_conns, &n);
+       if (!params) {
+               err = -ENOMEM;
+               goto done;
+       }
+
+       for (i = 0; i < n; ++i) {
+               err = hci_le_add_accept_list_sync(hdev, &params[i],
+                                                 &num_entries);
+               if (err) {
+                       kvfree(params);
                        goto done;
+               }
        }
 
+       kvfree(params);
+
        /* After adding all new pending connections, walk through
         * the list of pending reports and also add these to the
         * accept list if there is still space. Abort if space runs out.
         */
-       list_for_each_entry(params, &hdev->pend_le_reports, action) {
-               err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
-               if (err)
+
+       params = conn_params_copy(&hdev->pend_le_reports, &n);
+       if (!params) {
+               err = -ENOMEM;
+               goto done;
+       }
+
+       for (i = 0; i < n; ++i) {
+               err = hci_le_add_accept_list_sync(hdev, &params[i],
+                                                 &num_entries);
+               if (err) {
+                       kvfree(params);
                        goto done;
+               }
        }
 
+       kvfree(params);
+
        /* Use the allowlist unless the following conditions are all true:
         * - We are not currently suspending
         * - There are 1 or more ADV monitors registered and it's not offloaded
@@ -4837,12 +4932,12 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
        struct hci_conn_params *p;
 
        list_for_each_entry(p, &hdev->le_conn_params, list) {
+               hci_pend_le_list_del_init(p);
                if (p->conn) {
                        hci_conn_drop(p->conn);
                        hci_conn_put(p->conn);
                        p->conn = NULL;
                }
-               list_del_init(&p->action);
        }
 
        BT_DBG("All LE pending actions cleared");
index 0e6cc57..505d622 100644 (file)
@@ -123,8 +123,11 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
 {
        struct iso_conn *conn = hcon->iso_data;
 
-       if (conn)
+       if (conn) {
+               if (!conn->hcon)
+                       conn->hcon = hcon;
                return conn;
+       }
 
        conn = kzalloc(sizeof(*conn), GFP_KERNEL);
        if (!conn)
@@ -300,14 +303,13 @@ static int iso_connect_bis(struct sock *sk)
                goto unlock;
        }
 
-       hci_dev_unlock(hdev);
-       hci_dev_put(hdev);
+       lock_sock(sk);
 
        err = iso_chan_add(conn, sk, NULL);
-       if (err)
-               return err;
-
-       lock_sock(sk);
+       if (err) {
+               release_sock(sk);
+               goto unlock;
+       }
 
        /* Update source addr of the socket */
        bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -321,7 +323,6 @@ static int iso_connect_bis(struct sock *sk)
        }
 
        release_sock(sk);
-       return err;
 
 unlock:
        hci_dev_unlock(hdev);
@@ -389,14 +390,13 @@ static int iso_connect_cis(struct sock *sk)
                goto unlock;
        }
 
-       hci_dev_unlock(hdev);
-       hci_dev_put(hdev);
+       lock_sock(sk);
 
        err = iso_chan_add(conn, sk, NULL);
-       if (err)
-               return err;
-
-       lock_sock(sk);
+       if (err) {
+               release_sock(sk);
+               goto unlock;
+       }
 
        /* Update source addr of the socket */
        bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -413,7 +413,6 @@ static int iso_connect_cis(struct sock *sk)
        }
 
        release_sock(sk);
-       return err;
 
 unlock:
        hci_dev_unlock(hdev);
@@ -1072,8 +1071,8 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
                            size_t len)
 {
        struct sock *sk = sock->sk;
-       struct iso_conn *conn = iso_pi(sk)->conn;
        struct sk_buff *skb, **frag;
+       size_t mtu;
        int err;
 
        BT_DBG("sock %p, sk %p", sock, sk);
@@ -1085,11 +1084,18 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
        if (msg->msg_flags & MSG_OOB)
                return -EOPNOTSUPP;
 
-       if (sk->sk_state != BT_CONNECTED)
+       lock_sock(sk);
+
+       if (sk->sk_state != BT_CONNECTED) {
+               release_sock(sk);
                return -ENOTCONN;
+       }
+
+       mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
+
+       release_sock(sk);
 
-       skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
-                            HCI_ISO_DATA_HDR_SIZE, 0);
+       skb = bt_skb_sendmsg(sk, msg, len, mtu, HCI_ISO_DATA_HDR_SIZE, 0);
        if (IS_ERR(skb))
                return PTR_ERR(skb);
 
@@ -1102,8 +1108,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
        while (len) {
                struct sk_buff *tmp;
 
-               tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
-                                    0, 0);
+               tmp = bt_skb_sendmsg(sk, msg, len, mtu, 0, 0);
                if (IS_ERR(tmp)) {
                        kfree_skb(skb);
                        return PTR_ERR(tmp);
@@ -1158,15 +1163,19 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
        BT_DBG("sk %p", sk);
 
        if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+               lock_sock(sk);
                switch (sk->sk_state) {
                case BT_CONNECT2:
-                       lock_sock(sk);
                        iso_conn_defer_accept(pi->conn->hcon);
                        sk->sk_state = BT_CONFIG;
                        release_sock(sk);
                        return 0;
                case BT_CONNECT:
+                       release_sock(sk);
                        return iso_connect_cis(sk);
+               default:
+                       release_sock(sk);
+                       break;
                }
        }
 
index f7b2d09..d449803 100644 (file)
@@ -1297,15 +1297,15 @@ static void restart_le_actions(struct hci_dev *hdev)
                /* Needed for AUTO_OFF case where might not "really"
                 * have been powered off.
                 */
-               list_del_init(&p->action);
+               hci_pend_le_list_del_init(p);
 
                switch (p->auto_connect) {
                case HCI_AUTO_CONN_DIRECT:
                case HCI_AUTO_CONN_ALWAYS:
-                       list_add(&p->action, &hdev->pend_le_conns);
+                       hci_pend_le_list_add(p, &hdev->pend_le_conns);
                        break;
                case HCI_AUTO_CONN_REPORT:
-                       list_add(&p->action, &hdev->pend_le_reports);
+                       hci_pend_le_list_add(p, &hdev->pend_le_reports);
                        break;
                default:
                        break;
@@ -5169,7 +5169,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
                goto unlock;
        }
 
-       params->flags = current_flags;
+       WRITE_ONCE(params->flags, current_flags);
        status = MGMT_STATUS_SUCCESS;
 
        /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
@@ -7285,7 +7285,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
 
        bt_dev_dbg(hdev, "err %d", err);
 
-       memcpy(&rp.addr, &cp->addr.bdaddr, sizeof(rp.addr));
+       memcpy(&rp.addr, &cp->addr, sizeof(rp.addr));
 
        status = mgmt_status(err);
        if (status == MGMT_STATUS_SUCCESS) {
@@ -7580,7 +7580,7 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
        if (params->auto_connect == auto_connect)
                return 0;
 
-       list_del_init(&params->action);
+       hci_pend_le_list_del_init(params);
 
        switch (auto_connect) {
        case HCI_AUTO_CONN_DISABLED:
@@ -7589,18 +7589,18 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
                 * connect to device, keep connecting.
                 */
                if (params->explicit_connect)
-                       list_add(&params->action, &hdev->pend_le_conns);
+                       hci_pend_le_list_add(params, &hdev->pend_le_conns);
                break;
        case HCI_AUTO_CONN_REPORT:
                if (params->explicit_connect)
-                       list_add(&params->action, &hdev->pend_le_conns);
+                       hci_pend_le_list_add(params, &hdev->pend_le_conns);
                else
-                       list_add(&params->action, &hdev->pend_le_reports);
+                       hci_pend_le_list_add(params, &hdev->pend_le_reports);
                break;
        case HCI_AUTO_CONN_DIRECT:
        case HCI_AUTO_CONN_ALWAYS:
                if (!is_connected(hdev, addr, addr_type))
-                       list_add(&params->action, &hdev->pend_le_conns);
+                       hci_pend_le_list_add(params, &hdev->pend_le_conns);
                break;
        }
 
@@ -7823,9 +7823,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
                        goto unlock;
                }
 
-               list_del(&params->action);
-               list_del(&params->list);
-               kfree(params);
+               hci_conn_params_free(params);
 
                device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
        } else {
@@ -7856,9 +7854,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
                                p->auto_connect = HCI_AUTO_CONN_EXPLICIT;
                                continue;
                        }
-                       list_del(&p->action);
-                       list_del(&p->list);
-                       kfree(p);
+                       hci_conn_params_free(p);
                }
 
                bt_dev_dbg(hdev, "All LE connection parameters were removed");
index cd1a27a..7762604 100644 (file)
@@ -126,8 +126,11 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
        struct hci_dev *hdev = hcon->hdev;
        struct sco_conn *conn = hcon->sco_data;
 
-       if (conn)
+       if (conn) {
+               if (!conn->hcon)
+                       conn->hcon = hcon;
                return conn;
+       }
 
        conn = kzalloc(sizeof(struct sco_conn), GFP_KERNEL);
        if (!conn)
@@ -268,21 +271,21 @@ static int sco_connect(struct sock *sk)
                goto unlock;
        }
 
-       hci_dev_unlock(hdev);
-       hci_dev_put(hdev);
-
        conn = sco_conn_add(hcon);
        if (!conn) {
                hci_conn_drop(hcon);
-               return -ENOMEM;
+               err = -ENOMEM;
+               goto unlock;
        }
 
-       err = sco_chan_add(conn, sk, NULL);
-       if (err)
-               return err;
-
        lock_sock(sk);
 
+       err = sco_chan_add(conn, sk, NULL);
+       if (err) {
+               release_sock(sk);
+               goto unlock;
+       }
+
        /* Update source addr of the socket */
        bacpy(&sco_pi(sk)->src, &hcon->src);
 
@@ -296,8 +299,6 @@ static int sco_connect(struct sock *sk)
 
        release_sock(sk);
 
-       return err;
-
 unlock:
        hci_dev_unlock(hdev);
        hci_dev_put(hdev);
index 9ba3568..9168114 100644 (file)
@@ -1526,6 +1526,12 @@ static int bcm_release(struct socket *sock)
 
        lock_sock(sk);
 
+#if IS_ENABLED(CONFIG_PROC_FS)
+       /* remove procfs entry */
+       if (net->can.bcmproc_dir && bo->bcm_proc_read)
+               remove_proc_entry(bo->procname, net->can.bcmproc_dir);
+#endif /* CONFIG_PROC_FS */
+
        list_for_each_entry_safe(op, next, &bo->tx_ops, list)
                bcm_remove_op(op);
 
@@ -1561,12 +1567,6 @@ static int bcm_release(struct socket *sock)
        list_for_each_entry_safe(op, next, &bo->rx_ops, list)
                bcm_remove_op(op);
 
-#if IS_ENABLED(CONFIG_PROC_FS)
-       /* remove procfs entry */
-       if (net->can.bcmproc_dir && bo->bcm_proc_read)
-               remove_proc_entry(bo->procname, net->can.bcmproc_dir);
-#endif /* CONFIG_PROC_FS */
-
        /* remove device reference */
        if (bo->bound) {
                bo->bound   = 0;
index 99770ed..f02b5d3 100644 (file)
@@ -188,12 +188,6 @@ static bool isotp_register_rxid(struct isotp_sock *so)
        return (isotp_bc_flags(so) == 0);
 }
 
-static bool isotp_register_txecho(struct isotp_sock *so)
-{
-       /* all modes but SF_BROADCAST register for tx echo skbs */
-       return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST);
-}
-
 static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
 {
        struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
@@ -1209,7 +1203,7 @@ static int isotp_release(struct socket *sock)
        lock_sock(sk);
 
        /* remove current filters & unregister */
-       if (so->bound && isotp_register_txecho(so)) {
+       if (so->bound) {
                if (so->ifindex) {
                        struct net_device *dev;
 
@@ -1332,14 +1326,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
                can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
                                isotp_rcv, sk, "isotp", sk);
 
-       if (isotp_register_txecho(so)) {
-               /* no consecutive frame echo skb in flight */
-               so->cfecho = 0;
+       /* no consecutive frame echo skb in flight */
+       so->cfecho = 0;
 
-               /* register for echo skb's */
-               can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
-                               isotp_rcv_echo, sk, "isotpe", sk);
-       }
+       /* register for echo skb's */
+       can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
+                       isotp_rcv_echo, sk, "isotpe", sk);
 
        dev_put(dev);
 
@@ -1560,7 +1552,7 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
        case NETDEV_UNREGISTER:
                lock_sock(sk);
                /* remove current filters & unregister */
-               if (so->bound && isotp_register_txecho(so)) {
+               if (so->bound) {
                        if (isotp_register_rxid(so))
                                can_rx_unregister(dev_net(dev), dev, so->rxid,
                                                  SINGLE_MASK(so->rxid),
index 15c79b0..d50c3f3 100644 (file)
@@ -84,6 +84,8 @@ struct raw_sock {
        struct sock sk;
        int bound;
        int ifindex;
+       struct net_device *dev;
+       netdevice_tracker dev_tracker;
        struct list_head notifier;
        int loopback;
        int recv_own_msgs;
@@ -277,21 +279,24 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
        if (!net_eq(dev_net(dev), sock_net(sk)))
                return;
 
-       if (ro->ifindex != dev->ifindex)
+       if (ro->dev != dev)
                return;
 
        switch (msg) {
        case NETDEV_UNREGISTER:
                lock_sock(sk);
                /* remove current filters & unregister */
-               if (ro->bound)
+               if (ro->bound) {
                        raw_disable_allfilters(dev_net(dev), dev, sk);
+                       netdev_put(dev, &ro->dev_tracker);
+               }
 
                if (ro->count > 1)
                        kfree(ro->filter);
 
                ro->ifindex = 0;
                ro->bound = 0;
+               ro->dev = NULL;
                ro->count = 0;
                release_sock(sk);
 
@@ -337,6 +342,7 @@ static int raw_init(struct sock *sk)
 
        ro->bound            = 0;
        ro->ifindex          = 0;
+       ro->dev              = NULL;
 
        /* set default filter to single entry dfilter */
        ro->dfilter.can_id   = 0;
@@ -383,18 +389,14 @@ static int raw_release(struct socket *sock)
        list_del(&ro->notifier);
        spin_unlock(&raw_notifier_lock);
 
+       rtnl_lock();
        lock_sock(sk);
 
        /* remove current filters & unregister */
        if (ro->bound) {
-               if (ro->ifindex) {
-                       struct net_device *dev;
-
-                       dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-                       if (dev) {
-                               raw_disable_allfilters(dev_net(dev), dev, sk);
-                               dev_put(dev);
-                       }
+               if (ro->dev) {
+                       raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
+                       netdev_put(ro->dev, &ro->dev_tracker);
                } else {
                        raw_disable_allfilters(sock_net(sk), NULL, sk);
                }
@@ -405,6 +407,7 @@ static int raw_release(struct socket *sock)
 
        ro->ifindex = 0;
        ro->bound = 0;
+       ro->dev = NULL;
        ro->count = 0;
        free_percpu(ro->uniq);
 
@@ -412,6 +415,8 @@ static int raw_release(struct socket *sock)
        sock->sk = NULL;
 
        release_sock(sk);
+       rtnl_unlock();
+
        sock_put(sk);
 
        return 0;
@@ -422,6 +427,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
        struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
        struct sock *sk = sock->sk;
        struct raw_sock *ro = raw_sk(sk);
+       struct net_device *dev = NULL;
        int ifindex;
        int err = 0;
        int notify_enetdown = 0;
@@ -431,24 +437,23 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
        if (addr->can_family != AF_CAN)
                return -EINVAL;
 
+       rtnl_lock();
        lock_sock(sk);
 
        if (ro->bound && addr->can_ifindex == ro->ifindex)
                goto out;
 
        if (addr->can_ifindex) {
-               struct net_device *dev;
-
                dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
                if (!dev) {
                        err = -ENODEV;
                        goto out;
                }
                if (dev->type != ARPHRD_CAN) {
-                       dev_put(dev);
                        err = -ENODEV;
-                       goto out;
+                       goto out_put_dev;
                }
+
                if (!(dev->flags & IFF_UP))
                        notify_enetdown = 1;
 
@@ -456,7 +461,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
 
                /* filters set by default/setsockopt */
                err = raw_enable_allfilters(sock_net(sk), dev, sk);
-               dev_put(dev);
+               if (err)
+                       goto out_put_dev;
+
        } else {
                ifindex = 0;
 
@@ -467,26 +474,30 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
        if (!err) {
                if (ro->bound) {
                        /* unregister old filters */
-                       if (ro->ifindex) {
-                               struct net_device *dev;
-
-                               dev = dev_get_by_index(sock_net(sk),
-                                                      ro->ifindex);
-                               if (dev) {
-                                       raw_disable_allfilters(dev_net(dev),
-                                                              dev, sk);
-                                       dev_put(dev);
-                               }
+                       if (ro->dev) {
+                               raw_disable_allfilters(dev_net(ro->dev),
+                                                      ro->dev, sk);
+                               /* drop reference to old ro->dev */
+                               netdev_put(ro->dev, &ro->dev_tracker);
                        } else {
                                raw_disable_allfilters(sock_net(sk), NULL, sk);
                        }
                }
                ro->ifindex = ifindex;
                ro->bound = 1;
+               /* bind() ok -> hold a reference for new ro->dev */
+               ro->dev = dev;
+               if (ro->dev)
+                       netdev_hold(ro->dev, &ro->dev_tracker, GFP_KERNEL);
        }
 
- out:
+out_put_dev:
+       /* remove potential reference from dev_get_by_index() */
+       if (dev)
+               dev_put(dev);
+out:
        release_sock(sk);
+       rtnl_unlock();
 
        if (notify_enetdown) {
                sk->sk_err = ENETDOWN;
@@ -553,9 +564,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                rtnl_lock();
                lock_sock(sk);
 
-               if (ro->bound && ro->ifindex) {
-                       dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-                       if (!dev) {
+               dev = ro->dev;
+               if (ro->bound && dev) {
+                       if (dev->reg_state != NETREG_REGISTERED) {
                                if (count > 1)
                                        kfree(filter);
                                err = -ENODEV;
@@ -596,7 +607,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                ro->count  = count;
 
  out_fil:
-               dev_put(dev);
                release_sock(sk);
                rtnl_unlock();
 
@@ -614,9 +624,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                rtnl_lock();
                lock_sock(sk);
 
-               if (ro->bound && ro->ifindex) {
-                       dev = dev_get_by_index(sock_net(sk), ro->ifindex);
-                       if (!dev) {
+               dev = ro->dev;
+               if (ro->bound && dev) {
+                       if (dev->reg_state != NETREG_REGISTERED) {
                                err = -ENODEV;
                                goto out_err;
                        }
@@ -640,7 +650,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
                ro->err_mask = err_mask;
 
  out_err:
-               dev_put(dev);
                release_sock(sk);
                rtnl_unlock();
 
@@ -873,7 +882,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
 
        skb->dev = dev;
        skb->priority = sk->sk_priority;
-       skb->mark = sk->sk_mark;
+       skb->mark = READ_ONCE(sk->sk_mark);
        skb->tstamp = sockc.transmit_time;
 
        skb_setup_tx_timestamp(skb, sockc.tsflags);
index cd7b0bf..5eb4898 100644 (file)
@@ -1123,6 +1123,7 @@ bool ceph_addr_is_blank(const struct ceph_entity_addr *addr)
                return true;
        }
 }
+EXPORT_SYMBOL(ceph_addr_is_blank);
 
 int ceph_addr_port(const struct ceph_entity_addr *addr)
 {
index 1a888b8..1df1d29 100644 (file)
@@ -390,6 +390,8 @@ static int head_onwire_len(int ctrl_len, bool secure)
        int head_len;
        int rem_len;
 
+       BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN);
+
        if (secure) {
                head_len = CEPH_PREAMBLE_SECURE_LEN;
                if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) {
@@ -408,6 +410,10 @@ static int head_onwire_len(int ctrl_len, bool secure)
 static int __tail_onwire_len(int front_len, int middle_len, int data_len,
                             bool secure)
 {
+       BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN ||
+              middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN ||
+              data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN);
+
        if (!front_len && !middle_len && !data_len)
                return 0;
 
@@ -520,29 +526,34 @@ static int decode_preamble(void *p, struct ceph_frame_desc *desc)
                desc->fd_aligns[i] = ceph_decode_16(&p);
        }
 
-       /*
-        * This would fire for FRAME_TAG_WAIT (it has one empty
-        * segment), but we should never get it as client.
-        */
-       if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
-               pr_err("last segment empty\n");
+       if (desc->fd_lens[0] < 0 ||
+           desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
+               pr_err("bad control segment length %d\n", desc->fd_lens[0]);
                return -EINVAL;
        }
-
-       if (desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) {
-               pr_err("control segment too big %d\n", desc->fd_lens[0]);
+       if (desc->fd_lens[1] < 0 ||
+           desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
+               pr_err("bad front segment length %d\n", desc->fd_lens[1]);
                return -EINVAL;
        }
-       if (desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) {
-               pr_err("front segment too big %d\n", desc->fd_lens[1]);
+       if (desc->fd_lens[2] < 0 ||
+           desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
+               pr_err("bad middle segment length %d\n", desc->fd_lens[2]);
                return -EINVAL;
        }
-       if (desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) {
-               pr_err("middle segment too big %d\n", desc->fd_lens[2]);
+       if (desc->fd_lens[3] < 0 ||
+           desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
+               pr_err("bad data segment length %d\n", desc->fd_lens[3]);
                return -EINVAL;
        }
-       if (desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) {
-               pr_err("data segment too big %d\n", desc->fd_lens[3]);
+
+       /*
+        * This would fire for FRAME_TAG_WAIT (it has one empty
+        * segment), but we should never get it as client.
+        */
+       if (!desc->fd_lens[desc->fd_seg_cnt - 1]) {
+               pr_err("last segment empty, segment count %d\n",
+                      desc->fd_seg_cnt);
                return -EINVAL;
        }
 
index 11c04e7..658a6f2 100644 (file)
@@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
        int ret;
 
        dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
-       ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
+       ret = wait_for_completion_killable(&lreq->reg_commit_wait);
        return ret ?: lreq->reg_commit_error;
 }
 
-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
+static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
+                                    unsigned long timeout)
 {
-       int ret;
+       long left;
 
        dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
-       ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
-       return ret ?: lreq->notify_finish_error;
+       left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
+                                               ceph_timeout_jiffies(timeout));
+       if (left <= 0)
+               left = left ?: -ETIMEDOUT;
+       else
+               left = lreq->notify_finish_error; /* completed */
+
+       return left;
 }
 
 /*
@@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
        linger_submit(lreq);
        ret = linger_reg_commit_wait(lreq);
        if (!ret)
-               ret = linger_notify_finish_wait(lreq);
+               ret = linger_notify_finish_wait(lreq,
+                                msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
        else
                dout("lreq %p failed to initiate notify %d\n", lreq, ret);
 
index d417253..cca7594 100644 (file)
@@ -496,8 +496,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
                return ERR_PTR(-EPERM);
 
        nla_for_each_nested(nla, nla_stgs, rem) {
-               if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
+               if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
+                       if (nla_len(nla) != sizeof(u32))
+                               return ERR_PTR(-EINVAL);
                        nr_maps++;
+               }
        }
 
        diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
index 06ba0e5..28a5959 100644 (file)
@@ -4116,12 +4116,6 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
        if (unlikely(data_end > data_hard_end))
                return -EINVAL;
 
-       /* ALL drivers MUST init xdp->frame_sz, chicken check below */
-       if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
-               WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
-               return -EINVAL;
-       }
-
        if (unlikely(data_end < xdp->data + ETH_HLEN))
                return -EINVAL;
 
index 805b738..6aef976 100644 (file)
@@ -63,4 +63,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
 EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
 EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(udp_fail_queue_rcv_skb);
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready);
index 3ad4e03..00c94d9 100644 (file)
@@ -2268,13 +2268,27 @@ out_err:
        return err;
 }
 
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
-                       struct netlink_ext_ack *exterr)
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+                            struct netlink_ext_ack *exterr)
 {
-       return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy,
+       const struct ifinfomsg *ifmp;
+       const struct nlattr *attrs;
+       size_t len;
+
+       ifmp = nla_data(nla_peer);
+       attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg);
+       len = nla_len(nla_peer) - sizeof(struct ifinfomsg);
+
+       if (ifmp->ifi_index < 0) {
+               NL_SET_ERR_MSG_ATTR(exterr, nla_peer,
+                                   "ifindex can't be negative");
+               return -EINVAL;
+       }
+
+       return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy,
                                    exterr);
 }
-EXPORT_SYMBOL(rtnl_nla_parse_ifla);
+EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
 
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
@@ -3547,6 +3561,9 @@ replay:
        if (ifm->ifi_index > 0) {
                link_specified = true;
                dev = __dev_get_by_index(net, ifm->ifi_index);
+       } else if (ifm->ifi_index < 0) {
+               NL_SET_ERR_MSG(extack, "ifindex can't be negative");
+               return -EINVAL;
        } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
                link_specified = true;
                dev = rtnl_dev_get(net, tb);
@@ -5140,13 +5157,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
        br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
        if (br_spec) {
                nla_for_each_nested(attr, br_spec, rem) {
-                       if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+                       if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
                                if (nla_len(attr) < sizeof(flags))
                                        return -EINVAL;
 
                                have_flags = true;
                                flags = nla_get_u16(attr);
-                               break;
+                       }
+
+                       if (nla_type(attr) == IFLA_BRIDGE_MODE) {
+                               if (nla_len(attr) < sizeof(u16))
+                                       return -EINVAL;
                        }
                }
        }
index 6c5915e..a298992 100644 (file)
@@ -4261,6 +4261,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
 
        skb_push(skb, -skb_network_offset(skb) + offset);
 
+       /* Ensure the head is writeable before touching the shared info */
+       err = skb_unclone(skb, GFP_ATOMIC);
+       if (err)
+               goto err_linearize;
+
        skb_shinfo(skb)->frag_list = NULL;
 
        while (list_skb) {
index a29508e..ef1a2eb 100644 (file)
@@ -1120,13 +1120,19 @@ static void sk_psock_strp_data_ready(struct sock *sk)
 
 int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
 {
+       int ret;
+
        static const struct strp_callbacks cb = {
                .rcv_msg        = sk_psock_strp_read,
                .read_sock_done = sk_psock_strp_read_done,
                .parse_msg      = sk_psock_strp_parse,
        };
 
-       return strp_init(&psock->strp, sk, &cb);
+       ret = strp_init(&psock->strp, sk, &cb);
+       if (!ret)
+               sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
+
+       return ret;
 }
 
 void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
@@ -1154,7 +1160,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
 static void sk_psock_done_strp(struct sk_psock *psock)
 {
        /* Parser has been stopped */
-       if (psock->progs.stream_parser)
+       if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
                strp_done(&psock->strp);
 }
 #else
index 9370fd5..c9cffb7 100644 (file)
@@ -429,6 +429,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
 {
        struct __kernel_sock_timeval tv;
        int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
+       long val;
 
        if (err)
                return err;
@@ -439,7 +440,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
        if (tv.tv_sec < 0) {
                static int warned __read_mostly;
 
-               *timeo_p = 0;
+               WRITE_ONCE(*timeo_p, 0);
                if (warned < 10 && net_ratelimit()) {
                        warned++;
                        pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
@@ -447,11 +448,12 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
                }
                return 0;
        }
-       *timeo_p = MAX_SCHEDULE_TIMEOUT;
-       if (tv.tv_sec == 0 && tv.tv_usec == 0)
-               return 0;
-       if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
-               *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
+       val = MAX_SCHEDULE_TIMEOUT;
+       if ((tv.tv_sec || tv.tv_usec) &&
+           (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
+               val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
+                                                   USEC_PER_SEC / HZ);
+       WRITE_ONCE(*timeo_p, val);
        return 0;
 }
 
@@ -804,7 +806,7 @@ EXPORT_SYMBOL(sock_no_linger);
 void sock_set_priority(struct sock *sk, u32 priority)
 {
        lock_sock(sk);
-       sk->sk_priority = priority;
+       WRITE_ONCE(sk->sk_priority, priority);
        release_sock(sk);
 }
 EXPORT_SYMBOL(sock_set_priority);
@@ -813,9 +815,9 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs)
 {
        lock_sock(sk);
        if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
-               sk->sk_sndtimeo = secs * HZ;
+               WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
        else
-               sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+               WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
        release_sock(sk);
 }
 EXPORT_SYMBOL(sock_set_sndtimeo);
@@ -988,7 +990,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
 static void __sock_set_mark(struct sock *sk, u32 val)
 {
        if (val != sk->sk_mark) {
-               sk->sk_mark = val;
+               WRITE_ONCE(sk->sk_mark, val);
                sk_dst_reset(sk);
        }
 }
@@ -1007,7 +1009,7 @@ static void sock_release_reserved_memory(struct sock *sk, int bytes)
        bytes = round_down(bytes, PAGE_SIZE);
 
        WARN_ON(bytes > sk->sk_reserved_mem);
-       sk->sk_reserved_mem -= bytes;
+       WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
        sk_mem_reclaim(sk);
 }
 
@@ -1044,7 +1046,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
        }
        sk->sk_forward_alloc += pages << PAGE_SHIFT;
 
-       sk->sk_reserved_mem += pages << PAGE_SHIFT;
+       WRITE_ONCE(sk->sk_reserved_mem,
+                  sk->sk_reserved_mem + (pages << PAGE_SHIFT));
 
        return 0;
 }
@@ -1213,7 +1216,7 @@ set_sndbuf:
                if ((val >= 0 && val <= 6) ||
                    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
                    sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
-                       sk->sk_priority = val;
+                       WRITE_ONCE(sk->sk_priority, val);
                else
                        ret = -EPERM;
                break;
@@ -1438,7 +1441,8 @@ set_sndbuf:
                        cmpxchg(&sk->sk_pacing_status,
                                SK_PACING_NONE,
                                SK_PACING_NEEDED);
-               sk->sk_max_pacing_rate = ulval;
+               /* Pairs with READ_ONCE() from sk_getsockopt() */
+               WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
                sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
                break;
                }
@@ -1533,7 +1537,9 @@ set_sndbuf:
                }
                if ((u8)val == SOCK_TXREHASH_DEFAULT)
                        val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
-               /* Paired with READ_ONCE() in tcp_rtx_synack() */
+               /* Paired with READ_ONCE() in tcp_rtx_synack()
+                * and sk_getsockopt().
+                */
                WRITE_ONCE(sk->sk_txrehash, (u8)val);
                break;
 
@@ -1633,11 +1639,11 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
                break;
 
        case SO_SNDBUF:
-               v.val = sk->sk_sndbuf;
+               v.val = READ_ONCE(sk->sk_sndbuf);
                break;
 
        case SO_RCVBUF:
-               v.val = sk->sk_rcvbuf;
+               v.val = READ_ONCE(sk->sk_rcvbuf);
                break;
 
        case SO_REUSEADDR:
@@ -1679,7 +1685,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
                break;
 
        case SO_PRIORITY:
-               v.val = sk->sk_priority;
+               v.val = READ_ONCE(sk->sk_priority);
                break;
 
        case SO_LINGER:
@@ -1717,16 +1723,18 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
 
        case SO_RCVTIMEO_OLD:
        case SO_RCVTIMEO_NEW:
-               lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
+               lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
+                                     SO_RCVTIMEO_OLD == optname);
                break;
 
        case SO_SNDTIMEO_OLD:
        case SO_SNDTIMEO_NEW:
-               lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
+               lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
+                                     SO_SNDTIMEO_OLD == optname);
                break;
 
        case SO_RCVLOWAT:
-               v.val = sk->sk_rcvlowat;
+               v.val = READ_ONCE(sk->sk_rcvlowat);
                break;
 
        case SO_SNDLOWAT:
@@ -1770,7 +1778,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
                spin_unlock(&sk->sk_peer_lock);
 
                if (!peer_pid)
-                       return -ESRCH;
+                       return -ENODATA;
 
                pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
                put_pid(peer_pid);
@@ -1843,7 +1851,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
                                                         optval, optlen, len);
 
        case SO_MARK:
-               v.val = sk->sk_mark;
+               v.val = READ_ONCE(sk->sk_mark);
                break;
 
        case SO_RCVMARK:
@@ -1862,7 +1870,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
                if (!sock->ops->set_peek_off)
                        return -EOPNOTSUPP;
 
-               v.val = sk->sk_peek_off;
+               v.val = READ_ONCE(sk->sk_peek_off);
                break;
        case SO_NOFCS:
                v.val = sock_flag(sk, SOCK_NOFCS);
@@ -1892,7 +1900,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
 
 #ifdef CONFIG_NET_RX_BUSY_POLL
        case SO_BUSY_POLL:
-               v.val = sk->sk_ll_usec;
+               v.val = READ_ONCE(sk->sk_ll_usec);
                break;
        case SO_PREFER_BUSY_POLL:
                v.val = READ_ONCE(sk->sk_prefer_busy_poll);
@@ -1900,12 +1908,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
 #endif
 
        case SO_MAX_PACING_RATE:
+               /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
                if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
                        lv = sizeof(v.ulval);
-                       v.ulval = sk->sk_max_pacing_rate;
+                       v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
                } else {
                        /* 32bit version */
-                       v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
+                       v.val = min_t(unsigned long, ~0U,
+                                     READ_ONCE(sk->sk_max_pacing_rate));
                }
                break;
 
@@ -1973,11 +1983,12 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
                break;
 
        case SO_RESERVE_MEM:
-               v.val = sk->sk_reserved_mem;
+               v.val = READ_ONCE(sk->sk_reserved_mem);
                break;
 
        case SO_TXREHASH:
-               v.val = sk->sk_txrehash;
+               /* Paired with WRITE_ONCE() in sk_setsockopt() */
+               v.val = READ_ONCE(sk->sk_txrehash);
                break;
 
        default:
@@ -3148,7 +3159,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
        if (mem_cgroup_sockets_enabled && sk->sk_memcg)
                mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
 
-       if (sk_under_memory_pressure(sk) &&
+       if (sk_under_global_memory_pressure(sk) &&
            (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
                sk_leave_memory_pressure(sk);
 }
@@ -3168,7 +3179,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
 
 int sk_set_peek_off(struct sock *sk, int val)
 {
-       sk->sk_peek_off = val;
+       WRITE_ONCE(sk->sk_peek_off, val);
        return 0;
 }
 EXPORT_SYMBOL_GPL(sk_set_peek_off);
index 19538d6..8f07fea 100644 (file)
@@ -115,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk)
        __acquires(&sk->sk_lock.slock)
 {
        lock_sock(sk);
-       preempt_disable();
        rcu_read_lock();
 }
 
@@ -123,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk)
        __releases(&sk->sk_lock.slock)
 {
        rcu_read_unlock();
-       preempt_enable();
        release_sock(sk);
 }
 
@@ -148,13 +146,13 @@ static void sock_map_del_link(struct sock *sk,
        list_for_each_entry_safe(link, tmp, &psock->link, list) {
                if (link->link_raw == link_raw) {
                        struct bpf_map *map = link->map;
-                       struct bpf_stab *stab = container_of(map, struct bpf_stab,
-                                                            map);
-                       if (psock->saved_data_ready && stab->progs.stream_parser)
+                       struct sk_psock_progs *progs = sock_map_progs(map);
+
+                       if (psock->saved_data_ready && progs->stream_parser)
                                strp_stop = true;
-                       if (psock->saved_data_ready && stab->progs.stream_verdict)
+                       if (psock->saved_data_ready && progs->stream_verdict)
                                verdict_stop = true;
-                       if (psock->saved_data_ready && stab->progs.skb_verdict)
+                       if (psock->saved_data_ready && progs->skb_verdict)
                                verdict_stop = true;
                        list_del(&link->list);
                        sk_psock_free_link(link);
index 41e5ca8..8362130 100644 (file)
@@ -741,7 +741,7 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
 __diag_pop();
 
 BTF_SET8_START(xdp_metadata_kfunc_ids)
-#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
+#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, KF_TRUSTED_ARGS)
 XDP_METADATA_KFUNC_xxx
 #undef XDP_METADATA_KFUNC
 BTF_SET8_END(xdp_metadata_kfunc_ids)
index c0c4381..2e6b8c8 100644 (file)
@@ -980,7 +980,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
                return -EOPNOTSUPP;
 
        ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
-                                         tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
+                                         tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
                                          NULL);
        if (ret)
                return ret;
index fa80793..a545ad7 100644 (file)
@@ -130,7 +130,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                                                    inet->inet_daddr,
                                                    inet->inet_sport,
                                                    inet->inet_dport);
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        err = dccp_connect(sk);
        rt = NULL;
@@ -432,7 +432,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
        RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
        newinet->mc_index  = inet_iif(skb);
        newinet->mc_ttl    = ip_hdr(skb)->ttl;
-       newinet->inet_id   = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
                goto put_and_exit;
index 7249ef2..d29d116 100644 (file)
@@ -238,8 +238,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
                opt = ireq->ipv6_opt;
                if (!opt)
                        opt = rcu_dereference(np->opt);
-               err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
-                              sk->sk_priority);
+               err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
+                              np->tclass, sk->sk_priority);
                rcu_read_unlock();
                err = net_xmit_eval(err);
        }
index b8a2473..fd2eb14 100644 (file)
@@ -187,7 +187,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 
        /* And store cached results */
        icsk->icsk_pmtu_cookie = pmtu;
-       dp->dccps_mss_cache = cur_mps;
+       WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
 
        return cur_mps;
 }
index f331e59..fcc5c9d 100644 (file)
@@ -315,11 +315,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
 __poll_t dccp_poll(struct file *file, struct socket *sock,
                       poll_table *wait)
 {
-       __poll_t mask;
        struct sock *sk = sock->sk;
+       __poll_t mask;
+       u8 shutdown;
+       int state;
 
        sock_poll_wait(file, sock, wait);
-       if (sk->sk_state == DCCP_LISTEN)
+
+       state = inet_sk_state_load(sk);
+       if (state == DCCP_LISTEN)
                return inet_csk_listen_poll(sk);
 
        /* Socket is not locked. We are protected from async events
@@ -328,20 +332,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
         */
 
        mask = 0;
-       if (sk->sk_err)
+       if (READ_ONCE(sk->sk_err))
                mask = EPOLLERR;
+       shutdown = READ_ONCE(sk->sk_shutdown);
 
-       if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+       if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
                mask |= EPOLLHUP;
-       if (sk->sk_shutdown & RCV_SHUTDOWN)
+       if (shutdown & RCV_SHUTDOWN)
                mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
 
        /* Connected? */
-       if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+       if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
                if (atomic_read(&sk->sk_rmem_alloc) > 0)
                        mask |= EPOLLIN | EPOLLRDNORM;
 
-               if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+               if (!(shutdown & SEND_SHUTDOWN)) {
                        if (sk_stream_is_writeable(sk)) {
                                mask |= EPOLLOUT | EPOLLWRNORM;
                        } else {  /* send SIGIO later */
@@ -359,7 +364,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
        }
        return mask;
 }
-
 EXPORT_SYMBOL_GPL(dccp_poll);
 
 int dccp_ioctl(struct sock *sk, int cmd, int *karg)
@@ -630,7 +634,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
                return dccp_getsockopt_service(sk, len,
                                               (__be32 __user *)optval, optlen);
        case DCCP_SOCKOPT_GET_CUR_MPS:
-               val = dp->dccps_mss_cache;
+               val = READ_ONCE(dp->dccps_mss_cache);
                break;
        case DCCP_SOCKOPT_AVAILABLE_CCIDS:
                return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
@@ -739,7 +743,7 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        trace_dccp_probe(sk, len);
 
-       if (len > dp->dccps_mss_cache)
+       if (len > READ_ONCE(dp->dccps_mss_cache))
                return -EMSGSIZE;
 
        lock_sock(sk);
@@ -772,6 +776,12 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
                goto out_discard;
        }
 
+       /* We need to check dccps_mss_cache after socket is locked. */
+       if (len > dp->dccps_mss_cache) {
+               rc = -EMSGSIZE;
+               goto out_discard;
+       }
+
        skb_reserve(skb, sk->sk_prot->max_header);
        rc = memcpy_from_msg(skb_put(skb, len), msg, len);
        if (rc != 0)
index 1f00f87..bfed792 100644 (file)
@@ -6704,6 +6704,7 @@ void devlink_notify_unregister(struct devlink *devlink)
        struct devlink_param_item *param_item;
        struct devlink_trap_item *trap_item;
        struct devlink_port *devlink_port;
+       struct devlink_linecard *linecard;
        struct devlink_rate *rate_node;
        struct devlink_region *region;
        unsigned long port_index;
@@ -6732,6 +6733,8 @@ void devlink_notify_unregister(struct devlink *devlink)
 
        xa_for_each(&devlink->ports, port_index, devlink_port)
                devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+       list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
+               devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
        devlink_notify(devlink, DEVLINK_CMD_DEL);
 }
 
index 0ce8fd3..2f6195d 100644 (file)
@@ -1727,8 +1727,15 @@ int dsa_port_phylink_create(struct dsa_port *dp)
            ds->ops->phylink_mac_an_restart)
                dp->pl_config.legacy_pre_march2020 = true;
 
-       if (ds->ops->phylink_get_caps)
+       if (ds->ops->phylink_get_caps) {
                ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config);
+       } else {
+               /* For legacy drivers */
+               __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+                         dp->pl_config.supported_interfaces);
+               __set_bit(PHY_INTERFACE_MODE_GMII,
+                         dp->pl_config.supported_interfaces);
+       }
 
        pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
                            mode, &dsa_port_phylink_mac_ops);
index 9b2ca2f..02736b8 100644 (file)
@@ -340,7 +340,7 @@ lookup_protocol:
        else
                inet->pmtudisc = IP_PMTUDISC_WANT;
 
-       inet->inet_id = 0;
+       atomic_set(&inet->inet_id, 0);
 
        sock_init_data(sock, sk);
 
index 4d1af0c..cb5dbee 100644 (file)
@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
        reuseport_has_conns_set(sk);
        sk->sk_state = TCP_ESTABLISHED;
        sk_set_txhash(sk);
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        sk_dst_set(sk, &rt->dst);
        err = 0;
index ba06ed4..2be2d49 100644 (file)
@@ -1132,7 +1132,7 @@ static int esp_init_authenc(struct xfrm_state *x,
        err = crypto_aead_setkey(aead, key, keylen);
 
 free_key:
-       kfree(key);
+       kfree_sensitive(key);
 
 error:
        return err;
index 0cc19cf..aeebe88 100644 (file)
@@ -1019,7 +1019,7 @@ static void reqsk_timer_handler(struct timer_list *t)
 
        icsk = inet_csk(sk_listener);
        net = sock_net(sk_listener);
-       max_syn_ack_retries = icsk->icsk_syn_retries ? :
+       max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
                READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
        /* Normally all the openreqs are young and become mature
         * (i.e. converted to established socket) for first timeout.
index b812eb3..f742692 100644 (file)
@@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
        }
 #endif
 
-       if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+       if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
                goto errout;
 
        if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
@@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
        entry.ifindex = sk->sk_bound_dev_if;
        entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
        if (sk_fullsock(sk))
-               entry.mark = sk->sk_mark;
+               entry.mark = READ_ONCE(sk->sk_mark);
        else if (sk->sk_state == TCP_NEW_SYN_RECV)
                entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
        else if (sk->sk_state == TCP_TIME_WAIT)
index e7391bf..0819d60 100644 (file)
@@ -650,20 +650,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
        spin_lock(lock);
        if (osk) {
                WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
-               ret = sk_hashed(osk);
-               if (ret) {
-                       /* Before deleting the node, we insert a new one to make
-                        * sure that the look-up-sk process would not miss either
-                        * of them and that at least one node would exist in ehash
-                        * table all the time. Otherwise there's a tiny chance
-                        * that lookup process could find nothing in ehash table.
-                        */
-                       __sk_nulls_add_node_tail_rcu(sk, list);
-                       sk_nulls_del_node_init_rcu(osk);
-               }
-               goto unlock;
-       }
-       if (found_dup_sk) {
+               ret = sk_nulls_del_node_init_rcu(osk);
+       } else if (found_dup_sk) {
                *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
                if (*found_dup_sk)
                        ret = false;
@@ -672,7 +660,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
        if (ret)
                __sk_nulls_add_node_rcu(sk, list);
 
-unlock:
        spin_unlock(lock);
 
        return ret;
index 4005241..2c1b245 100644 (file)
@@ -88,10 +88,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
 }
 EXPORT_SYMBOL_GPL(inet_twsk_put);
 
-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
-                                       struct hlist_nulls_head *list)
+static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
+                                  struct hlist_nulls_head *list)
 {
-       hlist_nulls_add_tail_rcu(&tw->tw_node, list);
+       hlist_nulls_add_head_rcu(&tw->tw_node, list);
 }
 
 static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -144,7 +144,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 
        spin_lock(lock);
 
-       inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
+       inet_twsk_add_node_rcu(tw, &ehead->chain);
 
        /* Step 3: Remove SK from hash chain */
        if (__sk_nulls_del_node_init_rcu(sk))
index 81a1cce..22a26d1 100644 (file)
@@ -548,7 +548,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
                goto err_free_skb;
 
        if (skb->len > dev->mtu + dev->hard_header_len) {
-               pskb_trim(skb, dev->mtu + dev->hard_header_len);
+               if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+                       goto err_free_skb;
                truncate = true;
        }
 
@@ -689,7 +690,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
                goto free_skb;
 
        if (skb->len > dev->mtu + dev->hard_header_len) {
-               pskb_trim(skb, dev->mtu + dev->hard_header_len);
+               if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+                       goto free_skb;
                truncate = true;
        }
 
index 6e70839..6ba1a0f 100644 (file)
@@ -184,9 +184,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
                ip_options_build(skb, &opt->opt, daddr, rt);
        }
 
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
        if (!skb->mark)
-               skb->mark = sk->sk_mark;
+               skb->mark = READ_ONCE(sk->sk_mark);
 
        /* Send it out. */
        return ip_local_out(net, skb->sk, skb);
@@ -528,8 +528,8 @@ packet_routed:
                             skb_shinfo(skb)->gso_segs ?: 1);
 
        /* TODO : should we use skb->sk here instead of sk ? */
-       skb->priority = sk->sk_priority;
-       skb->mark = sk->sk_mark;
+       skb->priority = READ_ONCE(sk->sk_priority);
+       skb->mark = READ_ONCE(sk->sk_mark);
 
        res = ip_local_out(net, sk, skb);
        rcu_read_unlock();
@@ -1158,10 +1158,15 @@ alloc_new_skb:
                        }
 
                        copy = datalen - transhdrlen - fraggap - pagedlen;
+                       /* [!] NOTE: copy will be negative if pagedlen>0
+                        * because then the equation reduces to -fraggap.
+                        */
                        if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
                                err = -EFAULT;
                                kfree_skb(skb);
                                goto error;
+                       } else if (flags & MSG_SPLICE_PAGES) {
+                               copy = 0;
                        }
 
                        offset += copy;
@@ -1209,6 +1214,10 @@ alloc_new_skb:
                } else if (flags & MSG_SPLICE_PAGES) {
                        struct msghdr *msg = from;
 
+                       err = -EIO;
+                       if (WARN_ON_ONCE(copy > msg->msg_iter.count))
+                               goto error;
+
                        err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
                                                   sk->sk_allocation);
                        if (err < 0)
index 8e97d8d..d41bce8 100644 (file)
@@ -592,7 +592,7 @@ void __ip_sock_set_tos(struct sock *sk, int val)
        }
        if (inet_sk(sk)->tos != val) {
                inet_sk(sk)->tos = val;
-               sk->sk_priority = rt_tos2priority(val);
+               WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
                sk_dst_reset(sk);
        }
 }
index 92c02c8..586b1b3 100644 (file)
@@ -224,7 +224,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
                .un.frag.__unused       = 0,
                .un.frag.mtu            = htons(mtu),
        };
-       icmph->checksum = ip_compute_csum(icmph, len);
+       icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0));
        skb_reset_transport_header(skb);
 
        niph = skb_push(skb, sizeof(*niph));
index 53bfd8a..d1e7d0c 100644 (file)
@@ -287,12 +287,12 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
        switch (skb->protocol) {
        case htons(ETH_P_IP):
-               xfrm_decode_session(skb, &fl, AF_INET);
                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET);
                break;
        case htons(ETH_P_IPV6):
-               xfrm_decode_session(skb, &fl, AF_INET6);
                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET6);
                break;
        default:
                goto tx_err;
index f95142e..be5498f 100644 (file)
@@ -3221,13 +3221,9 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
                                     &rtm_dump_nexthop_cb, &filter);
        if (err < 0) {
                if (likely(skb->len))
-                       goto out;
-               goto out_err;
+                       err = skb->len;
        }
 
-out:
-       err = skb->len;
-out_err:
        cb->seq = net->nexthop.seq;
        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
        return err;
@@ -3367,25 +3363,19 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
                    dd->filter.res_bucket_nh_id != nhge->nh->id)
                        continue;
 
+               dd->ctx->bucket_index = bucket_index;
                err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
                                         RTM_NEWNEXTHOPBUCKET, portid,
                                         cb->nlh->nlmsg_seq, NLM_F_MULTI,
                                         cb->extack);
-               if (err < 0) {
-                       if (likely(skb->len))
-                               goto out;
-                       goto out_err;
-               }
+               if (err)
+                       return err;
        }
 
        dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
-       bucket_index = 0;
+       dd->ctx->bucket_index = 0;
 
-out:
-       err = skb->len;
-out_err:
-       dd->ctx->bucket_index = bucket_index;
-       return err;
+       return 0;
 }
 
 static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
@@ -3434,13 +3424,9 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
 
        if (err < 0) {
                if (likely(skb->len))
-                       goto out;
-               goto out_err;
+                       err = skb->len;
        }
 
-out:
-       err = skb->len;
-out_err:
        cb->seq = net->nexthop.seq;
        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
        return err;
index 7782ff5..cb381f5 100644 (file)
@@ -348,7 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
                goto error;
        skb_reserve(skb, hlen);
 
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = sockc->mark;
        skb->tstamp = sockc->transmit_time;
        skb_dst_set(skb, &rt->dst);
index 98d7e6b..92fede3 100644 (file)
@@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
                const struct inet_sock *inet = inet_sk(sk);
 
                oif = sk->sk_bound_dev_if;
-               mark = sk->sk_mark;
+               mark = READ_ONCE(sk->sk_mark);
                tos = ip_sock_rt_tos(sk);
                scope = ip_sock_rt_scope(sk);
                prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
@@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
        inet_opt = rcu_dereference(inet->inet_opt);
        if (inet_opt && inet_opt->opt.srr)
                daddr = inet_opt->opt.faddr;
-       flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+       flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
                           ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
                           ip_sock_rt_scope(sk),
                           inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
index e03e087..8ed52e1 100644 (file)
@@ -3291,7 +3291,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
                return -EINVAL;
 
        lock_sock(sk);
-       inet_csk(sk)->icsk_syn_retries = val;
+       WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
        release_sock(sk);
        return 0;
 }
@@ -3300,7 +3300,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt);
 void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
 {
        lock_sock(sk);
-       inet_csk(sk)->icsk_user_timeout = val;
+       WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
        release_sock(sk);
 }
 EXPORT_SYMBOL(tcp_sock_set_user_timeout);
@@ -3312,7 +3312,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
        if (val < 1 || val > MAX_TCP_KEEPIDLE)
                return -EINVAL;
 
-       tp->keepalive_time = val * HZ;
+       /* Paired with WRITE_ONCE() in keepalive_time_when() */
+       WRITE_ONCE(tp->keepalive_time, val * HZ);
        if (sock_flag(sk, SOCK_KEEPOPEN) &&
            !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
                u32 elapsed = keepalive_time_elapsed(tp);
@@ -3344,7 +3345,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
                return -EINVAL;
 
        lock_sock(sk);
-       tcp_sk(sk)->keepalive_intvl = val * HZ;
+       WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
        release_sock(sk);
        return 0;
 }
@@ -3356,7 +3357,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
                return -EINVAL;
 
        lock_sock(sk);
-       tcp_sk(sk)->keepalive_probes = val;
+       /* Paired with READ_ONCE() in keepalive_probes() */
+       WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
        release_sock(sk);
        return 0;
 }
@@ -3558,19 +3560,19 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
                if (val < 1 || val > MAX_TCP_KEEPINTVL)
                        err = -EINVAL;
                else
-                       tp->keepalive_intvl = val * HZ;
+                       WRITE_ONCE(tp->keepalive_intvl, val * HZ);
                break;
        case TCP_KEEPCNT:
                if (val < 1 || val > MAX_TCP_KEEPCNT)
                        err = -EINVAL;
                else
-                       tp->keepalive_probes = val;
+                       WRITE_ONCE(tp->keepalive_probes, val);
                break;
        case TCP_SYNCNT:
                if (val < 1 || val > MAX_TCP_SYNCNT)
                        err = -EINVAL;
                else
-                       icsk->icsk_syn_retries = val;
+                       WRITE_ONCE(icsk->icsk_syn_retries, val);
                break;
 
        case TCP_SAVE_SYN:
@@ -3583,18 +3585,18 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
 
        case TCP_LINGER2:
                if (val < 0)
-                       tp->linger2 = -1;
+                       WRITE_ONCE(tp->linger2, -1);
                else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
-                       tp->linger2 = TCP_FIN_TIMEOUT_MAX;
+                       WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
                else
-                       tp->linger2 = val * HZ;
+                       WRITE_ONCE(tp->linger2, val * HZ);
                break;
 
        case TCP_DEFER_ACCEPT:
                /* Translate value in seconds to number of retransmits */
-               icsk->icsk_accept_queue.rskq_defer_accept =
-                       secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
-                                       TCP_RTO_MAX / HZ);
+               WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
+                          secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+                                          TCP_RTO_MAX / HZ));
                break;
 
        case TCP_WINDOW_CLAMP:
@@ -3618,7 +3620,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
                if (val < 0)
                        err = -EINVAL;
                else
-                       icsk->icsk_user_timeout = val;
+                       WRITE_ONCE(icsk->icsk_user_timeout, val);
                break;
 
        case TCP_FASTOPEN:
@@ -3656,13 +3658,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
                if (!tp->repair)
                        err = -EPERM;
                else
-                       tp->tsoffset = val - tcp_time_stamp_raw();
+                       WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
                break;
        case TCP_REPAIR_WINDOW:
                err = tcp_repair_set_window(tp, optval, optlen);
                break;
        case TCP_NOTSENT_LOWAT:
-               tp->notsent_lowat = val;
+               WRITE_ONCE(tp->notsent_lowat, val);
                sk->sk_write_space(sk);
                break;
        case TCP_INQ:
@@ -3674,7 +3676,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
        case TCP_TX_DELAY:
                if (val)
                        tcp_enable_tx_delay();
-               tp->tcp_tx_delay = val;
+               WRITE_ONCE(tp->tcp_tx_delay, val);
                break;
        default:
                err = -ENOPROTOOPT;
@@ -3991,17 +3993,18 @@ int do_tcp_getsockopt(struct sock *sk, int level,
                val = keepalive_probes(tp);
                break;
        case TCP_SYNCNT:
-               val = icsk->icsk_syn_retries ? :
+               val = READ_ONCE(icsk->icsk_syn_retries) ? :
                        READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
                break;
        case TCP_LINGER2:
-               val = tp->linger2;
+               val = READ_ONCE(tp->linger2);
                if (val >= 0)
                        val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
                break;
        case TCP_DEFER_ACCEPT:
-               val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
-                                     TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
+               val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
+               val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
+                                     TCP_RTO_MAX / HZ);
                break;
        case TCP_WINDOW_CLAMP:
                val = tp->window_clamp;
@@ -4138,11 +4141,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
                break;
 
        case TCP_USER_TIMEOUT:
-               val = icsk->icsk_user_timeout;
+               val = READ_ONCE(icsk->icsk_user_timeout);
                break;
 
        case TCP_FASTOPEN:
-               val = icsk->icsk_accept_queue.fastopenq.max_qlen;
+               val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
                break;
 
        case TCP_FASTOPEN_CONNECT:
@@ -4154,14 +4157,14 @@ int do_tcp_getsockopt(struct sock *sk, int level,
                break;
 
        case TCP_TX_DELAY:
-               val = tp->tcp_tx_delay;
+               val = READ_ONCE(tp->tcp_tx_delay);
                break;
 
        case TCP_TIMESTAMP:
-               val = tcp_time_stamp_raw() + tp->tsoffset;
+               val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
                break;
        case TCP_NOTSENT_LOWAT:
-               val = tp->notsent_lowat;
+               val = READ_ONCE(tp->notsent_lowat);
                break;
        case TCP_INQ:
                val = tp->recvmsg_inq;
index 45cc7f1..85e4953 100644 (file)
@@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
 static bool tcp_fastopen_queue_check(struct sock *sk)
 {
        struct fastopen_queue *fastopenq;
+       int max_qlen;
 
        /* Make sure the listener has enabled fastopen, and we don't
         * exceed the max # of pending TFO requests allowed before trying
@@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
         * temporarily vs a server not supporting Fast Open at all.
         */
        fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
-       if (fastopenq->max_qlen == 0)
+       max_qlen = READ_ONCE(fastopenq->max_qlen);
+       if (max_qlen == 0)
                return false;
 
-       if (fastopenq->qlen >= fastopenq->max_qlen) {
+       if (fastopenq->qlen >= max_qlen) {
                struct request_sock *req1;
                spin_lock(&fastopenq->lock);
                req1 = fastopenq->rskq_rst_head;
index fd365de..2dbdc26 100644 (file)
@@ -307,11 +307,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
                                                  inet->inet_daddr,
                                                  inet->inet_sport,
                                                  usin->sin_port));
-               tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr,
-                                                inet->inet_daddr);
+               WRITE_ONCE(tp->tsoffset,
+                          secure_tcp_ts_off(net, inet->inet_saddr,
+                                            inet->inet_daddr));
        }
 
-       inet->inet_id = get_random_u16();
+       atomic_set(&inet->inet_id, get_random_u16());
 
        if (tcp_fastopen_defer_connect(sk, &err))
                return err;
@@ -930,9 +931,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
        ctl_sk = this_cpu_read(ipv4_tcp_sk);
        sock_net_set(ctl_sk, net);
        ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
-                          inet_twsk(sk)->tw_mark : sk->sk_mark;
+                          inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
        ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
-                          inet_twsk(sk)->tw_priority : sk->sk_priority;
+                          inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
        transmit_time = tcp_transmit_time(sk);
        ip_send_unicast_reply(ctl_sk,
                              skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -988,11 +989,12 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
                        tcp_rsk(req)->rcv_nxt,
                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                        tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
-                       req->ts_recent,
+                       READ_ONCE(req->ts_recent),
                        0,
                        tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
                        inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
-                       ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
+                       ip_hdr(skb)->tos,
+                       READ_ONCE(tcp_rsk(req)->txhash));
 }
 
 /*
@@ -1594,7 +1596,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
        inet_csk(newsk)->icsk_ext_hdr_len = 0;
        if (inet_opt)
                inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
-       newinet->inet_id = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        /* Set ToS of the new socket based upon the value of incoming SYN.
         * ECT bits are set later in tcp_init_transfer().
index 82f4575..99ac5ef 100644 (file)
@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
 
 struct tcp_metrics_block {
        struct tcp_metrics_block __rcu  *tcpm_next;
-       possible_net_t                  tcpm_net;
+       struct net                      *tcpm_net;
        struct inetpeer_addr            tcpm_saddr;
        struct inetpeer_addr            tcpm_daddr;
        unsigned long                   tcpm_stamp;
@@ -51,34 +51,38 @@ struct tcp_metrics_block {
        struct rcu_head                 rcu_head;
 };
 
-static inline struct net *tm_net(struct tcp_metrics_block *tm)
+static inline struct net *tm_net(const struct tcp_metrics_block *tm)
 {
-       return read_pnet(&tm->tcpm_net);
+       /* Paired with the WRITE_ONCE() in tcpm_new() */
+       return READ_ONCE(tm->tcpm_net);
 }
 
 static bool tcp_metric_locked(struct tcp_metrics_block *tm,
                              enum tcp_metric_index idx)
 {
-       return tm->tcpm_lock & (1 << idx);
+       /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
+       return READ_ONCE(tm->tcpm_lock) & (1 << idx);
 }
 
-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
                          enum tcp_metric_index idx)
 {
-       return tm->tcpm_vals[idx];
+       /* Paired with WRITE_ONCE() in tcp_metric_set() */
+       return READ_ONCE(tm->tcpm_vals[idx]);
 }
 
 static void tcp_metric_set(struct tcp_metrics_block *tm,
                           enum tcp_metric_index idx,
                           u32 val)
 {
-       tm->tcpm_vals[idx] = val;
+       /* Paired with READ_ONCE() in tcp_metric_get() */
+       WRITE_ONCE(tm->tcpm_vals[idx], val);
 }
 
 static bool addr_same(const struct inetpeer_addr *a,
                      const struct inetpeer_addr *b)
 {
-       return inetpeer_addr_cmp(a, b) == 0;
+       return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
 }
 
 struct tcpm_hash_bucket {
@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket        *tcp_metrics_hash __read_mostly;
 static unsigned int            tcp_metrics_hash_log __read_mostly;
 
 static DEFINE_SPINLOCK(tcp_metrics_lock);
+static DEFINE_SEQLOCK(fastopen_seqlock);
 
 static void tcpm_suck_dst(struct tcp_metrics_block *tm,
                          const struct dst_entry *dst,
@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
        u32 msval;
        u32 val;
 
-       tm->tcpm_stamp = jiffies;
+       WRITE_ONCE(tm->tcpm_stamp, jiffies);
 
        val = 0;
        if (dst_metric_locked(dst, RTAX_RTT))
@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
                val |= 1 << TCP_METRIC_CWND;
        if (dst_metric_locked(dst, RTAX_REORDERING))
                val |= 1 << TCP_METRIC_REORDERING;
-       tm->tcpm_lock = val;
+       /* Paired with READ_ONCE() in tcp_metric_locked() */
+       WRITE_ONCE(tm->tcpm_lock, val);
 
        msval = dst_metric_raw(dst, RTAX_RTT);
-       tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+       tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
 
        msval = dst_metric_raw(dst, RTAX_RTTVAR);
-       tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
-       tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
-       tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
-       tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
+       tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
+       tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+                      dst_metric_raw(dst, RTAX_SSTHRESH));
+       tcp_metric_set(tm, TCP_METRIC_CWND,
+                      dst_metric_raw(dst, RTAX_CWND));
+       tcp_metric_set(tm, TCP_METRIC_REORDERING,
+                      dst_metric_raw(dst, RTAX_REORDERING));
        if (fastopen_clear) {
+               write_seqlock(&fastopen_seqlock);
                tm->tcpm_fastopen.mss = 0;
                tm->tcpm_fastopen.syn_loss = 0;
                tm->tcpm_fastopen.try_exp = 0;
                tm->tcpm_fastopen.cookie.exp = false;
                tm->tcpm_fastopen.cookie.len = 0;
+               write_sequnlock(&fastopen_seqlock);
        }
 }
 
 #define TCP_METRICS_TIMEOUT            (60 * 60 * HZ)
 
-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_check_stamp(struct tcp_metrics_block *tm,
+                            const struct dst_entry *dst)
 {
-       if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
+       unsigned long limit;
+
+       if (!tm)
+               return;
+       limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
+       if (unlikely(time_after(jiffies, limit)))
                tcpm_suck_dst(tm, dst, false);
 }
 
@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
                oldest = deref_locked(tcp_metrics_hash[hash].chain);
                for (tm = deref_locked(oldest->tcpm_next); tm;
                     tm = deref_locked(tm->tcpm_next)) {
-                       if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
+                       if (time_before(READ_ONCE(tm->tcpm_stamp),
+                                       READ_ONCE(oldest->tcpm_stamp)))
                                oldest = tm;
                }
                tm = oldest;
        } else {
-               tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
+               tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
                if (!tm)
                        goto out_unlock;
        }
-       write_pnet(&tm->tcpm_net, net);
+       /* Paired with the READ_ONCE() in tm_net() */
+       WRITE_ONCE(tm->tcpm_net, net);
+
        tm->tcpm_saddr = *saddr;
        tm->tcpm_daddr = *daddr;
 
-       tcpm_suck_dst(tm, dst, true);
+       tcpm_suck_dst(tm, dst, reclaim);
 
        if (likely(!reclaim)) {
                tm->tcpm_next = tcp_metrics_hash[hash].chain;
@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk)
                                               tp->reordering);
                }
        }
-       tm->tcpm_stamp = jiffies;
+       WRITE_ONCE(tm->tcpm_stamp, jiffies);
 out_unlock:
        rcu_read_unlock();
 }
@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
        return ret;
 }
 
-static DEFINE_SEQLOCK(fastopen_seqlock);
-
 void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
                            struct tcp_fastopen_cookie *cookie)
 {
@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
        }
 
        if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
-                         jiffies - tm->tcpm_stamp,
+                         jiffies - READ_ONCE(tm->tcpm_stamp),
                          TCP_METRICS_ATTR_PAD) < 0)
                goto nla_put_failure;
 
@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
                if (!nest)
                        goto nla_put_failure;
                for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
-                       u32 val = tm->tcpm_vals[i];
+                       u32 val = tcp_metric_get(tm, i);
 
                        if (!val)
                                continue;
index 04fc328..c8f2aa0 100644 (file)
@@ -528,7 +528,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
        newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
 
        newtp->lsndtime = tcp_jiffies32;
-       newsk->sk_txhash = treq->txhash;
+       newsk->sk_txhash = READ_ONCE(treq->txhash);
        newtp->total_retrans = req->num_retrans;
 
        tcp_init_xmit_timers(newsk);
@@ -555,7 +555,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
        newtp->max_window = newtp->snd_wnd;
 
        if (newtp->rx_opt.tstamp_ok) {
-               newtp->rx_opt.ts_recent = req->ts_recent;
+               newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
                newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
                newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
        } else {
@@ -619,7 +619,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
 
                if (tmp_opt.saw_tstamp) {
-                       tmp_opt.ts_recent = req->ts_recent;
+                       tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
                        if (tmp_opt.rcv_tsecr)
                                tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
                        /* We do not store true stamp, but it is not required,
@@ -758,8 +758,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 
        /* In sequence, PAWS is OK. */
 
+       /* TODO: We probably should defer ts_recent change once
+        * we take ownership of @req.
+        */
        if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
-               req->ts_recent = tmp_opt.rcv_tsval;
+               WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
 
        if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
                /* Truncate SYN, it is out of window starting
index 2cb39b6..51d8638 100644 (file)
@@ -878,7 +878,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
        if (likely(ireq->tstamp_ok)) {
                opts->options |= OPTION_TS;
                opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
-               opts->tsecr = req->ts_recent;
+               opts->tsecr = READ_ONCE(req->ts_recent);
                remaining -= TCPOLEN_TSTAMP_ALIGNED;
        }
        if (likely(ireq->sack_ok)) {
@@ -3660,7 +3660,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
        rcu_read_lock();
        md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
 #endif
-       skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+       skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4);
        /* bpf program will be interested in the tcp_flags */
        TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
        tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
@@ -4210,7 +4210,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
 
        /* Paired with WRITE_ONCE() in sock_setsockopt() */
        if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
-               tcp_rsk(req)->txhash = net_tx_rndhash();
+               WRITE_ONCE(tcp_rsk(req)->txhash, net_tx_rndhash());
        res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
                                  NULL);
        if (!res) {
index 470f581..206418b 100644 (file)
@@ -591,7 +591,9 @@ out_reset_timer:
            tcp_stream_is_thin(tp) &&
            icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
                icsk->icsk_backoff = 0;
-               icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+               icsk->icsk_rto = clamp(__tcp_set_rto(tp),
+                                      tcp_rto_min(sk),
+                                      TCP_RTO_MAX);
        } else if (sk->sk_state != TCP_SYN_SENT ||
                   icsk->icsk_backoff >
                   READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
index 42a96b3..abfa860 100644 (file)
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
 #include <net/udp_tunnel.h>
+#include <net/gro.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6_stubs.h>
 #endif
@@ -555,10 +556,13 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
 {
        const struct iphdr *iph = ip_hdr(skb);
        struct net *net = dev_net(skb->dev);
+       int iif, sdif;
+
+       inet_get_iif_sdif(skb, &iif, &sdif);
 
        return __udp4_lib_lookup(net, iph->saddr, sport,
-                                iph->daddr, dport, inet_iif(skb),
-                                inet_sdif(skb), net->ipv4.udp_table, NULL);
+                                iph->daddr, dport, iif,
+                                sdif, net->ipv4.udp_table, NULL);
 }
 
 /* Must be called under rcu_read_lock().
index 75aa4de..0f46b3c 100644 (file)
@@ -274,13 +274,20 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
        __sum16 check;
        __be16 newlen;
 
-       if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
-               return __udp_gso_segment_list(gso_skb, features, is_ipv6);
-
        mss = skb_shinfo(gso_skb)->gso_size;
        if (gso_skb->len <= sizeof(*uh) + mss)
                return ERR_PTR(-EINVAL);
 
+       if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
+               /* Packet is from an untrusted source, reset gso_segs. */
+               skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
+                                                            mss);
+               return NULL;
+       }
+
+       if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+               return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
        skb_pull(gso_skb, sizeof(*uh));
 
        /* clear destructor to avoid skb_segment assigning it to tail */
@@ -388,8 +395,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
        if (!pskb_may_pull(skb, sizeof(struct udphdr)))
                goto out;
 
-       if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
-           !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+       if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
                return __udp_gso_segment(skb, features, false);
 
        mss = skb_shinfo(skb)->gso_size;
@@ -603,10 +609,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
 {
        const struct iphdr *iph = skb_gro_network_header(skb);
        struct net *net = dev_net(skb->dev);
+       int iif, sdif;
+
+       inet_get_iif_sdif(skb, &iif, &sdif);
 
        return __udp4_lib_lookup(net, iph->saddr, sport,
-                                iph->daddr, dport, inet_iif(skb),
-                                inet_sdif(skb), net->ipv4.udp_table, NULL);
+                                iph->daddr, dport, iif,
+                                sdif, net->ipv4.udp_table, NULL);
 }
 
 INDIRECT_CALLABLE_SCOPE
index 658bfed..08d4b71 100644 (file)
@@ -152,7 +152,7 @@ config INET6_TUNNEL
        default n
 
 config IPV6_VTI
-tristate "Virtual (secure) IPv6: tunneling"
+       tristate "Virtual (secure) IPv6: tunneling"
        select IPV6_TUNNEL
        select NET_IP_TUNNEL
        select XFRM
index 5479da0..94cec20 100644 (file)
@@ -318,9 +318,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
 static void addrconf_mod_rs_timer(struct inet6_dev *idev,
                                  unsigned long when)
 {
-       if (!timer_pending(&idev->rs_timer))
+       if (!mod_timer(&idev->rs_timer, jiffies + when))
                in6_dev_hold(idev);
-       mod_timer(&idev->rs_timer, jiffies + when);
 }
 
 static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
@@ -2562,12 +2561,18 @@ static void manage_tempaddrs(struct inet6_dev *idev,
                        ipv6_ifa_notify(0, ift);
        }
 
-       if ((create || list_empty(&idev->tempaddr_list)) &&
-           idev->cnf.use_tempaddr > 0) {
+       /* Also create a temporary address if it's enabled but no temporary
+        * address currently exists.
+        * However, we get called with valid_lft == 0, prefered_lft == 0, create == false
+        * as part of cleanup (ie. deleting the mngtmpaddr).
+        * We don't want that to result in creating a new temporary ip address.
+        */
+       if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
+               create = true;
+
+       if (create && idev->cnf.use_tempaddr > 0) {
                /* When a new public address is created as described
                 * in [ADDRCONF], also create a new temporary address.
-                * Also create a temporary address if it's enabled but
-                * no temporary address currently exists.
                 */
                read_unlock_bh(&idev->lock);
                ipv6_create_tempaddr(ifp, false);
index 9edf1f4..65fa501 100644 (file)
@@ -424,7 +424,10 @@ static struct net_device *icmp6_dev(const struct sk_buff *skb)
        if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
                const struct rt6_info *rt6 = skb_rt6_info(skb);
 
-               if (rt6)
+               /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
+                * and ip6_null_entry could be set to skb if no route is found.
+                */
+               if (rt6 && rt6->rt6i_idev)
                        dev = rt6->rt6i_idev->dev;
        }
 
index da80974..070d87a 100644 (file)
@@ -955,7 +955,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                goto tx_err;
 
        if (skb->len > dev->mtu + dev->hard_header_len) {
-               pskb_trim(skb, dev->mtu + dev->hard_header_len);
+               if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+                       goto tx_err;
                truncate = true;
        }
 
index 10b2228..73c85d4 100644 (file)
@@ -568,12 +568,12 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
                    vti6_addr_conflict(t, ipv6_hdr(skb)))
                        goto tx_err;
 
-               xfrm_decode_session(skb, &fl, AF_INET6);
                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET6);
                break;
        case htons(ETH_P_IP):
-               xfrm_decode_session(skb, &fl, AF_INET);
                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET);
                break;
        default:
                goto tx_err;
index cc3d5ad..67a3b8f 100644 (file)
@@ -1073,7 +1073,7 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
                   And all this only to mangle msg->im6_msgtype and
                   to set msg->im6_mbz to "mbz" :-)
                 */
-               skb_push(skb, -skb_network_offset(pkt));
+               __skb_pull(skb, skb_network_offset(pkt));
 
                skb_push(skb, sizeof(*msg));
                skb_reset_transport_header(skb);
index 18634eb..a42be96 100644 (file)
@@ -197,7 +197,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
 static inline int ndisc_is_useropt(const struct net_device *dev,
                                   struct nd_opt_hdr *opt)
 {
-       return opt->nd_opt_type == ND_OPT_RDNSS ||
+       return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
+               opt->nd_opt_type == ND_OPT_RDNSS ||
                opt->nd_opt_type == ND_OPT_DNSSL ||
                opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
                opt->nd_opt_type == ND_OPT_PREF64 ||
index f804c11..c2c2918 100644 (file)
@@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
        ipcm6_init_sk(&ipc6, np);
        ipc6.sockc.tsflags = sk->sk_tsflags;
-       ipc6.sockc.mark = sk->sk_mark;
+       ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
        fl6.flowi6_oif = oif;
 
index ac1cef0..49381f3 100644 (file)
@@ -614,7 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
        skb_reserve(skb, hlen);
 
        skb->protocol = htons(ETH_P_IPV6);
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = sockc->mark;
        skb->tstamp = sockc->transmit_time;
 
@@ -774,12 +774,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
         */
        memset(&fl6, 0, sizeof(fl6));
 
-       fl6.flowi6_mark = sk->sk_mark;
+       fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
        fl6.flowi6_uid = sk->sk_uid;
 
        ipcm6_init(&ipc6);
        ipc6.sockc.tsflags = sk->sk_tsflags;
-       ipc6.sockc.mark = sk->sk_mark;
+       ipc6.sockc.mark = fl6.flowi6_mark;
 
        if (sin6) {
                if (addr_len < SIN6_LEN_RFC2133)
index 64e873f..56a5558 100644 (file)
@@ -2951,7 +2951,8 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
        if (!oif && skb->dev)
                oif = l3mdev_master_ifindex(skb->dev);
 
-       ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
+       ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
+                       sk->sk_uid);
 
        dst = __sk_dst_get(sk);
        if (!dst || !dst->obsolete ||
@@ -3172,8 +3173,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
 
 void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
 {
-       ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
-                    sk->sk_uid);
+       ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
+                    READ_ONCE(sk->sk_mark), sk->sk_uid);
 }
 EXPORT_SYMBOL_GPL(ip6_sk_redirect);
 
index 40dd92a..6e86721 100644 (file)
@@ -564,8 +564,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
                opt = ireq->ipv6_opt;
                if (!opt)
                        opt = rcu_dereference(np->opt);
-               err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
-                              tclass, sk->sk_priority);
+               err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
+                              opt, tclass, sk->sk_priority);
                rcu_read_unlock();
                err = net_xmit_eval(err);
        }
@@ -939,7 +939,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
                if (sk->sk_state == TCP_TIME_WAIT)
                        mark = inet_twsk(sk)->tw_mark;
                else
-                       mark = sk->sk_mark;
+                       mark = READ_ONCE(sk->sk_mark);
                skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
        }
        if (txhash) {
@@ -1126,10 +1126,11 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
                        tcp_rsk(req)->rcv_nxt,
                        req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
                        tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
-                       req->ts_recent, sk->sk_bound_dev_if,
+                       READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
                        tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
-                       ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
-                       tcp_rsk(req)->txhash);
+                       ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+                       READ_ONCE(sk->sk_priority),
+                       READ_ONCE(tcp_rsk(req)->txhash));
 }
 
 
index 317b01c..f787e6b 100644 (file)
 #include <net/tcp_states.h>
 #include <net/ip6_checksum.h>
 #include <net/ip6_tunnel.h>
+#include <trace/events/udp.h>
 #include <net/xfrm.h>
 #include <net/inet_hashtables.h>
 #include <net/inet6_hashtables.h>
 #include <net/busy_poll.h>
 #include <net/sock_reuseport.h>
+#include <net/gro.h>
 
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
@@ -90,7 +92,7 @@ static u32 udp6_ehashfn(const struct net *net,
        fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
 
        return __inet6_ehashfn(lhash, lport, fhash, fport,
-                              udp_ipv6_hash_secret + net_hash_mix(net));
+                              udp6_ehash_secret + net_hash_mix(net));
 }
 
 int udp_v6_get_port(struct sock *sk, unsigned short snum)
@@ -299,10 +301,13 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
 {
        const struct ipv6hdr *iph = ipv6_hdr(skb);
        struct net *net = dev_net(skb->dev);
+       int iif, sdif;
+
+       inet6_get_iif_sdif(skb, &iif, &sdif);
 
        return __udp6_lib_lookup(net, &iph->saddr, sport,
-                                &iph->daddr, dport, inet6_iif(skb),
-                                inet6_sdif(skb), net->ipv4.udp_table, NULL);
+                                &iph->daddr, dport, iif,
+                                sdif, net->ipv4.udp_table, NULL);
 }
 
 /* Must be called under rcu_read_lock().
@@ -623,7 +628,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
        if (type == NDISC_REDIRECT) {
                if (tunnel) {
                        ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
-                                    sk->sk_mark, sk->sk_uid);
+                                    READ_ONCE(sk->sk_mark), sk->sk_uid);
                } else {
                        ip6_sk_redirect(skb, sk);
                }
@@ -680,6 +685,7 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
                }
                UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
                kfree_skb_reason(skb, drop_reason);
+               trace_udp_fail_queue_rcv_skb(rc, sk);
                return -1;
        }
 
@@ -1354,7 +1360,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        ipcm6_init(&ipc6);
        ipc6.gso_size = READ_ONCE(up->gso_size);
        ipc6.sockc.tsflags = sk->sk_tsflags;
-       ipc6.sockc.mark = sk->sk_mark;
+       ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
 
        /* destination address check */
        if (sin6) {
index ad3b872..6b95ba2 100644 (file)
@@ -43,8 +43,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
                if (!pskb_may_pull(skb, sizeof(struct udphdr)))
                        goto out;
 
-               if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
-                   !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+               if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
                        return __udp_gso_segment(skb, features, true);
 
                mss = skb_shinfo(skb)->gso_size;
@@ -119,10 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
 {
        const struct ipv6hdr *iph = skb_gro_network_header(skb);
        struct net *net = dev_net(skb->dev);
+       int iif, sdif;
+
+       inet6_get_iif_sdif(skb, &iif, &sdif);
 
        return __udp6_lib_lookup(net, &iph->saddr, sport,
-                                &iph->daddr, dport, inet6_iif(skb),
-                                inet6_sdif(skb), net->ipv4.udp_table, NULL);
+                                &iph->daddr, dport, iif,
+                                sdif, net->ipv4.udp_table, NULL);
 }
 
 INDIRECT_CALLABLE_SCOPE
index ede3c6a..b4ea4cf 100644 (file)
@@ -1848,9 +1848,9 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
        if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
                struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
 
-               if ((xfilter->sadb_x_filter_splen >=
+               if ((xfilter->sadb_x_filter_splen >
                        (sizeof(xfrm_address_t) << 3)) ||
-                   (xfilter->sadb_x_filter_dplen >=
+                   (xfilter->sadb_x_filter_dplen >
                        (sizeof(xfrm_address_t) << 3))) {
                        mutex_unlock(&pfk->dump_lock);
                        return -EINVAL;
index b1623f9..ff78217 100644 (file)
@@ -519,7 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
        /* Get and verify the address */
        memset(&fl6, 0, sizeof(fl6));
 
-       fl6.flowi6_mark = sk->sk_mark;
+       fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
        fl6.flowi6_uid = sk->sk_uid;
 
        ipcm6_init(&ipc6);
index 57c35c9..9b06c38 100644 (file)
@@ -402,7 +402,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
                memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN);
                laddr.lsap = addr->sllc_sap;
                rc = -EADDRINUSE; /* mac + sap clash. */
-               ask = llc_lookup_established(sap, &daddr, &laddr);
+               ask = llc_lookup_established(sap, &daddr, &laddr, &init_net);
                if (ask) {
                        sock_put(ask);
                        goto out_put;
index 912aa9b..d037009 100644 (file)
@@ -453,11 +453,13 @@ static int llc_exec_conn_trans_actions(struct sock *sk,
 static inline bool llc_estab_match(const struct llc_sap *sap,
                                   const struct llc_addr *daddr,
                                   const struct llc_addr *laddr,
-                                  const struct sock *sk)
+                                  const struct sock *sk,
+                                  const struct net *net)
 {
        struct llc_sock *llc = llc_sk(sk);
 
-       return llc->laddr.lsap == laddr->lsap &&
+       return net_eq(sock_net(sk), net) &&
+               llc->laddr.lsap == laddr->lsap &&
                llc->daddr.lsap == daddr->lsap &&
                ether_addr_equal(llc->laddr.mac, laddr->mac) &&
                ether_addr_equal(llc->daddr.mac, daddr->mac);
@@ -468,6 +470,7 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
  *     @sap: SAP
  *     @daddr: address of remote LLC (MAC + SAP)
  *     @laddr: address of local LLC (MAC + SAP)
+ *     @net: netns to look up a socket in
  *
  *     Search connection list of the SAP and finds connection using the remote
  *     mac, remote sap, local mac, and local sap. Returns pointer for
@@ -476,7 +479,8 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
  */
 static struct sock *__llc_lookup_established(struct llc_sap *sap,
                                             struct llc_addr *daddr,
-                                            struct llc_addr *laddr)
+                                            struct llc_addr *laddr,
+                                            const struct net *net)
 {
        struct sock *rc;
        struct hlist_nulls_node *node;
@@ -486,12 +490,12 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
        rcu_read_lock();
 again:
        sk_nulls_for_each_rcu(rc, node, laddr_hb) {
-               if (llc_estab_match(sap, daddr, laddr, rc)) {
+               if (llc_estab_match(sap, daddr, laddr, rc, net)) {
                        /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
                        if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
                                goto again;
                        if (unlikely(llc_sk(rc)->sap != sap ||
-                                    !llc_estab_match(sap, daddr, laddr, rc))) {
+                                    !llc_estab_match(sap, daddr, laddr, rc, net))) {
                                sock_put(rc);
                                continue;
                        }
@@ -513,29 +517,33 @@ found:
 
 struct sock *llc_lookup_established(struct llc_sap *sap,
                                    struct llc_addr *daddr,
-                                   struct llc_addr *laddr)
+                                   struct llc_addr *laddr,
+                                   const struct net *net)
 {
        struct sock *sk;
 
        local_bh_disable();
-       sk = __llc_lookup_established(sap, daddr, laddr);
+       sk = __llc_lookup_established(sap, daddr, laddr, net);
        local_bh_enable();
        return sk;
 }
 
 static inline bool llc_listener_match(const struct llc_sap *sap,
                                      const struct llc_addr *laddr,
-                                     const struct sock *sk)
+                                     const struct sock *sk,
+                                     const struct net *net)
 {
        struct llc_sock *llc = llc_sk(sk);
 
-       return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
+       return net_eq(sock_net(sk), net) &&
+               sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
                llc->laddr.lsap == laddr->lsap &&
                ether_addr_equal(llc->laddr.mac, laddr->mac);
 }
 
 static struct sock *__llc_lookup_listener(struct llc_sap *sap,
-                                         struct llc_addr *laddr)
+                                         struct llc_addr *laddr,
+                                         const struct net *net)
 {
        struct sock *rc;
        struct hlist_nulls_node *node;
@@ -545,12 +553,12 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap,
        rcu_read_lock();
 again:
        sk_nulls_for_each_rcu(rc, node, laddr_hb) {
-               if (llc_listener_match(sap, laddr, rc)) {
+               if (llc_listener_match(sap, laddr, rc, net)) {
                        /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
                        if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
                                goto again;
                        if (unlikely(llc_sk(rc)->sap != sap ||
-                                    !llc_listener_match(sap, laddr, rc))) {
+                                    !llc_listener_match(sap, laddr, rc, net))) {
                                sock_put(rc);
                                continue;
                        }
@@ -574,6 +582,7 @@ found:
  *     llc_lookup_listener - Finds listener for local MAC + SAP
  *     @sap: SAP
  *     @laddr: address of local LLC (MAC + SAP)
+ *     @net: netns to look up a socket in
  *
  *     Search connection list of the SAP and finds connection listening on
  *     local mac, and local sap. Returns pointer for parent socket found,
@@ -581,24 +590,26 @@ found:
  *     Caller has to make sure local_bh is disabled.
  */
 static struct sock *llc_lookup_listener(struct llc_sap *sap,
-                                       struct llc_addr *laddr)
+                                       struct llc_addr *laddr,
+                                       const struct net *net)
 {
+       struct sock *rc = __llc_lookup_listener(sap, laddr, net);
        static struct llc_addr null_addr;
-       struct sock *rc = __llc_lookup_listener(sap, laddr);
 
        if (!rc)
-               rc = __llc_lookup_listener(sap, &null_addr);
+               rc = __llc_lookup_listener(sap, &null_addr, net);
 
        return rc;
 }
 
 static struct sock *__llc_lookup(struct llc_sap *sap,
                                 struct llc_addr *daddr,
-                                struct llc_addr *laddr)
+                                struct llc_addr *laddr,
+                                const struct net *net)
 {
-       struct sock *sk = __llc_lookup_established(sap, daddr, laddr);
+       struct sock *sk = __llc_lookup_established(sap, daddr, laddr, net);
 
-       return sk ? : llc_lookup_listener(sap, laddr);
+       return sk ? : llc_lookup_listener(sap, laddr, net);
 }
 
 /**
@@ -776,7 +787,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
        llc_pdu_decode_da(skb, daddr.mac);
        llc_pdu_decode_dsap(skb, &daddr.lsap);
 
-       sk = __llc_lookup(sap, &saddr, &daddr);
+       sk = __llc_lookup(sap, &saddr, &daddr, dev_net(skb->dev));
        if (!sk)
                goto drop;
 
index dde9bf0..58a5f41 100644 (file)
@@ -92,7 +92,7 @@ int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap)
        daddr.lsap = dsap;
        memcpy(daddr.mac, dmac, sizeof(daddr.mac));
        memcpy(laddr.mac, lmac, sizeof(laddr.mac));
-       existing = llc_lookup_established(llc->sap, &daddr, &laddr);
+       existing = llc_lookup_established(llc->sap, &daddr, &laddr, sock_net(sk));
        if (existing) {
                if (existing->sk_state == TCP_ESTABLISHED) {
                        sk = existing;
index c309b72..7cac441 100644 (file)
@@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
        void (*sta_handler)(struct sk_buff *skb);
        void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
 
-       if (!net_eq(dev_net(dev), &init_net))
-               goto drop;
-
        /*
         * When the interface is in promisc. mode, drop all the crap that it
         * receives, do not try to analyse it.
index 6805ce4..116c0e4 100644 (file)
@@ -294,25 +294,29 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
 
 static inline bool llc_dgram_match(const struct llc_sap *sap,
                                   const struct llc_addr *laddr,
-                                  const struct sock *sk)
+                                  const struct sock *sk,
+                                  const struct net *net)
 {
      struct llc_sock *llc = llc_sk(sk);
 
      return sk->sk_type == SOCK_DGRAM &&
-         llc->laddr.lsap == laddr->lsap &&
-         ether_addr_equal(llc->laddr.mac, laddr->mac);
+            net_eq(sock_net(sk), net) &&
+            llc->laddr.lsap == laddr->lsap &&
+            ether_addr_equal(llc->laddr.mac, laddr->mac);
 }
 
 /**
  *     llc_lookup_dgram - Finds dgram socket for the local sap/mac
  *     @sap: SAP
  *     @laddr: address of local LLC (MAC + SAP)
+ *     @net: netns to look up a socket in
  *
  *     Search socket list of the SAP and finds connection using the local
  *     mac, and local sap. Returns pointer for socket found, %NULL otherwise.
  */
 static struct sock *llc_lookup_dgram(struct llc_sap *sap,
-                                    const struct llc_addr *laddr)
+                                    const struct llc_addr *laddr,
+                                    const struct net *net)
 {
        struct sock *rc;
        struct hlist_nulls_node *node;
@@ -322,12 +326,12 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
        rcu_read_lock_bh();
 again:
        sk_nulls_for_each_rcu(rc, node, laddr_hb) {
-               if (llc_dgram_match(sap, laddr, rc)) {
+               if (llc_dgram_match(sap, laddr, rc, net)) {
                        /* Extra checks required by SLAB_TYPESAFE_BY_RCU */
                        if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
                                goto again;
                        if (unlikely(llc_sk(rc)->sap != sap ||
-                                    !llc_dgram_match(sap, laddr, rc))) {
+                                    !llc_dgram_match(sap, laddr, rc, net))) {
                                sock_put(rc);
                                continue;
                        }
@@ -429,7 +433,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
                llc_sap_mcast(sap, &laddr, skb);
                kfree_skb(skb);
        } else {
-               struct sock *sk = llc_lookup_dgram(sap, &laddr);
+               struct sock *sk = llc_lookup_dgram(sap, &laddr, dev_net(skb->dev));
                if (sk) {
                        llc_sap_rcv(sap, skb, sk);
                        sock_put(sk);
index 4f707d2..0af2599 100644 (file)
@@ -1083,7 +1083,8 @@ static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx,
        struct sk_buff *tail = skb_peek_tail(frames);
        struct ieee80211_rx_status *status;
 
-       if (tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
+       if (tid_agg_rx->reorder_buf_filtered &&
+           tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
                return true;
 
        if (!tail)
@@ -1124,7 +1125,8 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
        }
 
 no_frame:
-       tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
+       if (tid_agg_rx->reorder_buf_filtered)
+               tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
        tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
 }
 
@@ -4264,6 +4266,7 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
                                          u16 ssn, u64 filtered,
                                          u16 received_mpdus)
 {
+       struct ieee80211_local *local;
        struct sta_info *sta;
        struct tid_ampdu_rx *tid_agg_rx;
        struct sk_buff_head frames;
@@ -4281,6 +4284,11 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
 
        sta = container_of(pubsta, struct sta_info, sta);
 
+       local = sta->sdata->local;
+       WARN_ONCE(local->hw.max_rx_aggregation_subframes > 64,
+                 "RX BA marker can't support max_rx_aggregation_subframes %u > 64\n",
+                 local->hw.max_rx_aggregation_subframes);
+
        if (!ieee80211_rx_data_set_sta(&rx, sta, -1))
                return;
 
index 3613489..d806585 100644 (file)
@@ -2335,7 +2335,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 
        lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
 
-       if (flags & MPTCP_CF_FASTCLOSE) {
+       if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
                /* be sure to force the tcp_disconnect() path,
                 * to generate the egress reset
                 */
@@ -3328,7 +3328,7 @@ static void mptcp_release_cb(struct sock *sk)
 
        if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
                __mptcp_clean_una_wakeup(sk);
-       if (unlikely(&msk->cb_flags)) {
+       if (unlikely(msk->cb_flags)) {
                /* be sure to set the current sk state before tacking actions
                 * depending on sk_state, that is processing MPTCP_ERROR_REPORT
                 */
@@ -3723,10 +3723,9 @@ static int mptcp_listen(struct socket *sock, int backlog)
        if (!err) {
                sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
                mptcp_copy_inaddrs(sk, ssock->sk);
+               mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
        }
 
-       mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
-
 unlock:
        release_sock(sk);
        return err;
index 37fbe22..ba2a873 100644 (file)
@@ -325,7 +325,6 @@ struct mptcp_sock {
        u32             subflow_id;
        u32             setsockopt_seq;
        char            ca_name[TCP_CA_NAME_MAX];
-       struct mptcp_sock       *dl_next;
 };
 
 #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
index 63f7a09..a3f1fe8 100644 (file)
@@ -103,7 +103,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
                        break;
                case SO_MARK:
                        if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
-                               ssk->sk_mark = sk->sk_mark;
+                               WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
                                sk_dst_reset(ssk);
                        }
                        break;
index 9ee3b7a..94ae7dd 100644 (file)
@@ -1793,16 +1793,31 @@ static void subflow_state_change(struct sock *sk)
 void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
 {
        struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
-       struct mptcp_sock *msk, *next, *head = NULL;
-       struct request_sock *req;
-       struct sock *sk;
+       struct request_sock *req, *head, *tail;
+       struct mptcp_subflow_context *subflow;
+       struct sock *sk, *ssk;
 
-       /* build a list of all unaccepted mptcp sockets */
+       /* Due to lock dependencies no relevant lock can be acquired under rskq_lock.
+        * Splice the req list, so that accept() can not reach the pending ssk after
+        * the listener socket is released below.
+        */
        spin_lock_bh(&queue->rskq_lock);
-       for (req = queue->rskq_accept_head; req; req = req->dl_next) {
-               struct mptcp_subflow_context *subflow;
-               struct sock *ssk = req->sk;
+       head = queue->rskq_accept_head;
+       tail = queue->rskq_accept_tail;
+       queue->rskq_accept_head = NULL;
+       queue->rskq_accept_tail = NULL;
+       spin_unlock_bh(&queue->rskq_lock);
+
+       if (!head)
+               return;
 
+       /* can't acquire the msk socket lock under the subflow one,
+        * or will cause ABBA deadlock
+        */
+       release_sock(listener_ssk);
+
+       for (req = head; req; req = req->dl_next) {
+               ssk = req->sk;
                if (!sk_is_mptcp(ssk))
                        continue;
 
@@ -1810,32 +1825,10 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
                if (!subflow || !subflow->conn)
                        continue;
 
-               /* skip if already in list */
                sk = subflow->conn;
-               msk = mptcp_sk(sk);
-               if (msk->dl_next || msk == head)
-                       continue;
-
                sock_hold(sk);
-               msk->dl_next = head;
-               head = msk;
-       }
-       spin_unlock_bh(&queue->rskq_lock);
-       if (!head)
-               return;
-
-       /* can't acquire the msk socket lock under the subflow one,
-        * or will cause ABBA deadlock
-        */
-       release_sock(listener_ssk);
-
-       for (msk = head; msk; msk = next) {
-               sk = (struct sock *)msk;
 
                lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
-               next = msk->dl_next;
-               msk->dl_next = NULL;
-
                __mptcp_unaccepted_force_close(sk);
                release_sock(sk);
 
@@ -1859,6 +1852,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
 
        /* we are still under the listener msk socket lock */
        lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+
+       /* restore the listener queue, to let the TCP code clean it up */
+       spin_lock_bh(&queue->rskq_lock);
+       WARN_ON_ONCE(queue->rskq_accept_head);
+       queue->rskq_accept_head = head;
+       queue->rskq_accept_tail = tail;
+       spin_unlock_bh(&queue->rskq_lock);
 }
 
 static int subflow_ulp_init(struct sock *sk)
index 62606fb..4bb0d90 100644 (file)
@@ -1876,6 +1876,7 @@ static int
 proc_do_sync_threshold(struct ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
+       struct netns_ipvs *ipvs = table->extra2;
        int *valp = table->data;
        int val[2];
        int rc;
@@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
                .mode = table->mode,
        };
 
+       mutex_lock(&ipvs->sync_mutex);
        memcpy(val, valp, sizeof(val));
        rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
        if (write) {
@@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
                else
                        memcpy(valp, val, sizeof(val));
        }
+       mutex_unlock(&ipvs->sync_mutex);
        return rc;
 }
 
@@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
        ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
        ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
        tbl[idx].data = &ipvs->sysctl_sync_threshold;
+       tbl[idx].extra2 = ipvs;
        tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
        ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
        tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
index d119f1d..9923931 100644 (file)
@@ -211,24 +211,18 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
                              unsigned int zoneid,
                              const struct net *net)
 {
-       u64 a, b, c, d;
+       siphash_key_t key;
 
        get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
 
-       /* The direction must be ignored, handle usable tuplehash members manually */
-       a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3];
-       b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3];
+       key = nf_conntrack_hash_rnd;
 
-       c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16;
-       c |= tuple->dst.protonum;
+       key.key[0] ^= zoneid;
+       key.key[1] ^= net_hash_mix(net);
 
-       d = (u64)zoneid << 32 | net_hash_mix(net);
-
-       /* IPv4: u3.all[1,2,3] == 0 */
-       c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2];
-       d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2];
-
-       return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd);
+       return siphash((void *)tuple,
+                       offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend),
+                       &key);
 }
 
 static u32 scale_hash(u32 hash)
index 0c4db2f..f22691f 100644 (file)
@@ -360,6 +360,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
        BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
        BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
 
+       if (!nf_ct_helper_hash)
+               return -ENOENT;
+
        if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
                return -EINVAL;
 
@@ -515,4 +518,5 @@ int nf_conntrack_helper_init(void)
 void nf_conntrack_helper_fini(void)
 {
        kvfree(nf_ct_helper_hash);
+       nf_ct_helper_hash = NULL;
 }
index ad6f0ca..af369e6 100644 (file)
@@ -205,6 +205,8 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
                            enum ip_conntrack_info ctinfo,
                            const struct nf_hook_state *state)
 {
+       unsigned long status;
+
        if (!nf_ct_is_confirmed(ct)) {
                unsigned int *timeouts = nf_ct_timeout_lookup(ct);
 
@@ -217,11 +219,17 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
                ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
        }
 
+       status = READ_ONCE(ct->status);
        /* If we've seen traffic both ways, this is a GRE connection.
         * Extend timeout. */
-       if (ct->status & IPS_SEEN_REPLY) {
+       if (status & IPS_SEEN_REPLY) {
                nf_ct_refresh_acct(ct, ctinfo, skb,
                                   ct->proto.gre.stream_timeout);
+
+               /* never set ASSURED for IPS_NAT_CLASH, they time out soon */
+               if (unlikely((status & IPS_NAT_CLASH)))
+                       return NF_ACCEPT;
+
                /* Also, more likely to be important, and not a probe. */
                if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
                        nf_conntrack_event_cache(IPCT_ASSURED, ct);
index 91eacc9..b6bcc8f 100644 (file)
@@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
        [SCTP_CONNTRACK_COOKIE_WAIT]            = 3 SECS,
        [SCTP_CONNTRACK_COOKIE_ECHOED]          = 3 SECS,
        [SCTP_CONNTRACK_ESTABLISHED]            = 210 SECS,
-       [SCTP_CONNTRACK_SHUTDOWN_SENT]          = 300 SECS / 1000,
-       [SCTP_CONNTRACK_SHUTDOWN_RECD]          = 300 SECS / 1000,
+       [SCTP_CONNTRACK_SHUTDOWN_SENT]          = 3 SECS,
+       [SCTP_CONNTRACK_SHUTDOWN_RECD]          = 3 SECS,
        [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]      = 3 SECS,
        [SCTP_CONNTRACK_HEARTBEAT_SENT]         = 30 SECS,
 };
@@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
        {
 /*     ORIGINAL        */
 /*                  sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
-/* init         */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init         */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
 /* init_ack     */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
 /* abort        */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
 /* shutdown     */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
index 9573a8f..eb8b116 100644 (file)
@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
 static LIST_HEAD(nf_tables_objects);
 static LIST_HEAD(nf_tables_flowtables);
 static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
 static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
 
 enum {
        NFT_VALIDATE_SKIP       = 0,
@@ -120,6 +122,9 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
 static void nf_tables_trans_destroy_work(struct work_struct *w);
 static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
 
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
 static void nft_ctx_init(struct nft_ctx *ctx,
                         struct net *net,
                         const struct sk_buff *skb,
@@ -253,8 +258,10 @@ int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
        if (chain->bound)
                return -EBUSY;
 
+       if (!nft_use_inc(&chain->use))
+               return -EMFILE;
+
        chain->bound = true;
-       chain->use++;
        nft_chain_trans_bind(ctx, chain);
 
        return 0;
@@ -437,7 +444,7 @@ static int nft_delchain(struct nft_ctx *ctx)
        if (IS_ERR(trans))
                return PTR_ERR(trans);
 
-       ctx->table->use--;
+       nft_use_dec(&ctx->table->use);
        nft_deactivate_next(ctx->net, ctx->chain);
 
        return 0;
@@ -476,7 +483,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
        /* You cannot delete the same rule twice */
        if (nft_is_active_next(ctx->net, rule)) {
                nft_deactivate_next(ctx->net, rule);
-               ctx->chain->use--;
+               nft_use_dec(&ctx->chain->use);
                return 0;
        }
        return -ENOENT;
@@ -580,10 +587,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
        return __nft_trans_set_add(ctx, msg_type, set, NULL);
 }
 
-static void nft_setelem_data_deactivate(const struct net *net,
-                                       const struct nft_set *set,
-                                       struct nft_set_elem *elem);
-
 static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
                                  struct nft_set *set,
                                  const struct nft_set_iter *iter,
@@ -644,7 +647,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
                nft_map_deactivate(ctx, set);
 
        nft_deactivate_next(ctx->net, set);
-       ctx->table->use--;
+       nft_use_dec(&ctx->table->use);
 
        return err;
 }
@@ -676,7 +679,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
                return err;
 
        nft_deactivate_next(ctx->net, obj);
-       ctx->table->use--;
+       nft_use_dec(&ctx->table->use);
 
        return err;
 }
@@ -711,7 +714,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
                return err;
 
        nft_deactivate_next(ctx->net, flowtable);
-       ctx->table->use--;
+       nft_use_dec(&ctx->table->use);
 
        return err;
 }
@@ -1370,7 +1373,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
        if (table == NULL)
                goto err_kzalloc;
 
-       table->validate_state = NFT_VALIDATE_SKIP;
+       table->validate_state = nft_net->validate_state;
        table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
        if (table->name == NULL)
                goto err_strdup;
@@ -2396,9 +2399,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
        struct nft_chain *chain;
        int err;
 
-       if (table->use == UINT_MAX)
-               return -EOVERFLOW;
-
        if (nla[NFTA_CHAIN_HOOK]) {
                struct nft_stats __percpu *stats = NULL;
                struct nft_chain_hook hook = {};
@@ -2494,6 +2494,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
        if (err < 0)
                goto err_destroy_chain;
 
+       if (!nft_use_inc(&table->use)) {
+               err = -EMFILE;
+               goto err_use;
+       }
+
        trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
        if (IS_ERR(trans)) {
                err = PTR_ERR(trans);
@@ -2510,10 +2515,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                goto err_unregister_hook;
        }
 
-       table->use++;
-
        return 0;
+
 err_unregister_hook:
+       nft_use_dec_restore(&table->use);
+err_use:
        nf_tables_unregister_hook(net, table, chain);
 err_destroy_chain:
        nf_tables_chain_destroy(ctx);
@@ -2694,7 +2700,7 @@ err_hooks:
 
 static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
                                               const struct nft_table *table,
-                                              const struct nlattr *nla)
+                                              const struct nlattr *nla, u8 genmask)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
        u32 id = ntohl(nla_get_be32(nla));
@@ -2705,7 +2711,8 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
 
                if (trans->msg_type == NFT_MSG_NEWCHAIN &&
                    chain->table == table &&
-                   id == nft_trans_chain_id(trans))
+                   id == nft_trans_chain_id(trans) &&
+                   nft_active_genmask(chain, genmask))
                        return chain;
        }
        return ERR_PTR(-ENOENT);
@@ -3679,8 +3686,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
                        if (err < 0)
                                return err;
                }
-
-               cond_resched();
        }
 
        return 0;
@@ -3704,6 +3709,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
                err = nft_chain_validate(&ctx, chain);
                if (err < 0)
                        return err;
+
+               cond_resched();
        }
 
        return 0;
@@ -3805,11 +3812,10 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
                        NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
                        return PTR_ERR(chain);
                }
-               if (nft_chain_is_bound(chain))
-                       return -EOPNOTSUPP;
 
        } else if (nla[NFTA_RULE_CHAIN_ID]) {
-               chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]);
+               chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID],
+                                             genmask);
                if (IS_ERR(chain)) {
                        NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
                        return PTR_ERR(chain);
@@ -3818,6 +3824,9 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
                return -EINVAL;
        }
 
+       if (nft_chain_is_bound(chain))
+               return -EOPNOTSUPP;
+
        if (nla[NFTA_RULE_HANDLE]) {
                handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
                rule = __nft_rule_lookup(chain, handle);
@@ -3840,9 +3849,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
                        return -EINVAL;
                handle = nf_tables_alloc_handle(table);
 
-               if (chain->use == UINT_MAX)
-                       return -EOVERFLOW;
-
                if (nla[NFTA_RULE_POSITION]) {
                        pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
                        old_rule = __nft_rule_lookup(chain, pos_handle);
@@ -3936,6 +3942,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
                }
        }
 
+       if (!nft_use_inc(&chain->use)) {
+               err = -EMFILE;
+               goto err_release_rule;
+       }
+
        if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
                err = nft_delrule(&ctx, old_rule);
                if (err < 0)
@@ -3967,7 +3978,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
                }
        }
        kvfree(expr_info);
-       chain->use++;
 
        if (flow)
                nft_trans_flow_rule(trans) = flow;
@@ -3978,6 +3988,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
        return 0;
 
 err_destroy_flow_rule:
+       nft_use_dec_restore(&chain->use);
        if (flow)
                nft_flow_rule_destroy(flow);
 err_release_rule:
@@ -4078,6 +4089,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
                list_for_each_entry(chain, &table->chains, list) {
                        if (!nft_is_active_next(net, chain))
                                continue;
+                       if (nft_chain_is_bound(chain))
+                               continue;
 
                        ctx.chain = chain;
                        err = nft_delrule_by_chain(&ctx);
@@ -5014,9 +5027,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
        alloc_size = sizeof(*set) + size + udlen;
        if (alloc_size < size || alloc_size > INT_MAX)
                return -ENOMEM;
+
+       if (!nft_use_inc(&table->use))
+               return -EMFILE;
+
        set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
-       if (!set)
-               return -ENOMEM;
+       if (!set) {
+               err = -ENOMEM;
+               goto err_alloc;
+       }
 
        name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
        if (!name) {
@@ -5037,6 +5056,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 
        INIT_LIST_HEAD(&set->bindings);
        INIT_LIST_HEAD(&set->catchall_list);
+       refcount_set(&set->refs, 1);
        set->table = table;
        write_pnet(&set->net, net);
        set->ops = ops;
@@ -5074,7 +5094,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
                goto err_set_expr_alloc;
 
        list_add_tail_rcu(&set->list, &table->sets);
-       table->use++;
+
        return 0;
 
 err_set_expr_alloc:
@@ -5086,6 +5106,9 @@ err_set_init:
        kfree(set->name);
 err_set_name:
        kvfree(set);
+err_alloc:
+       nft_use_dec_restore(&table->use);
+
        return err;
 }
 
@@ -5101,6 +5124,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
        }
 }
 
+static void nft_set_put(struct nft_set *set)
+{
+       if (refcount_dec_and_test(&set->refs)) {
+               kfree(set->name);
+               kvfree(set);
+       }
+}
+
 static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 {
        int i;
@@ -5113,8 +5144,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
 
        set->ops->destroy(ctx, set);
        nft_set_catchall_destroy(ctx, set);
-       kfree(set->name);
-       kvfree(set);
+       nft_set_put(set);
 }
 
 static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
@@ -5224,9 +5254,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
        struct nft_set_binding *i;
        struct nft_set_iter iter;
 
-       if (set->use == UINT_MAX)
-               return -EOVERFLOW;
-
        if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
                return -EBUSY;
 
@@ -5254,10 +5281,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
                        return iter.err;
        }
 bind:
+       if (!nft_use_inc(&set->use))
+               return -EMFILE;
+
        binding->chain = ctx->chain;
        list_add_tail_rcu(&binding->list, &set->bindings);
        nft_set_trans_bind(ctx, set);
-       set->use++;
 
        return 0;
 }
@@ -5331,7 +5360,7 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
                nft_clear(ctx->net, set);
        }
 
-       set->use++;
+       nft_use_inc_restore(&set->use);
 }
 EXPORT_SYMBOL_GPL(nf_tables_activate_set);
 
@@ -5347,7 +5376,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
                else
                        list_del_rcu(&binding->list);
 
-               set->use--;
+               nft_use_dec(&set->use);
                break;
        case NFT_TRANS_PREPARE:
                if (nft_set_is_anonymous(set)) {
@@ -5356,7 +5385,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
 
                        nft_deactivate_next(ctx->net, set);
                }
-               set->use--;
+               nft_use_dec(&set->use);
                return;
        case NFT_TRANS_ABORT:
        case NFT_TRANS_RELEASE:
@@ -5364,7 +5393,7 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
                    set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
                        nft_map_deactivate(ctx, set);
 
-               set->use--;
+               nft_use_dec(&set->use);
                fallthrough;
        default:
                nf_tables_unbind_set(ctx, set, binding,
@@ -5582,8 +5611,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
                                  const struct nft_set_iter *iter,
                                  struct nft_set_elem *elem)
 {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
        struct nft_set_dump_args *args;
 
+       if (nft_set_elem_expired(ext))
+               return 0;
+
        args = container_of(iter, struct nft_set_dump_args, iter);
        return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
 }
@@ -6155,7 +6188,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
                nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
 
        if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
-               (*nft_set_ext_obj(ext))->use--;
+               nft_use_dec(&(*nft_set_ext_obj(ext))->use);
        kfree(elem);
 }
 EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -6254,7 +6287,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
        list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
                if (nft_set_elem_active(ext, genmask) &&
-                   !nft_set_elem_expired(ext))
+                   !nft_set_elem_expired(ext) &&
+                   !nft_set_elem_is_dead(ext))
                        return ext;
        }
 
@@ -6262,29 +6296,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
 }
 EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
 
-void *nft_set_catchall_gc(const struct nft_set *set)
-{
-       struct nft_set_elem_catchall *catchall, *next;
-       struct nft_set_ext *ext;
-       void *elem = NULL;
-
-       list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
-               ext = nft_set_elem_ext(set, catchall->elem);
-
-               if (!nft_set_elem_expired(ext) ||
-                   nft_set_elem_mark_busy(ext))
-                       continue;
-
-               elem = catchall->elem;
-               list_del_rcu(&catchall->list);
-               kfree_rcu(catchall, rcu);
-               break;
-       }
-
-       return elem;
-}
-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
-
 static int nft_setelem_catchall_insert(const struct net *net,
                                       struct nft_set *set,
                                       const struct nft_set_elem *elem,
@@ -6346,7 +6357,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
 
        if (nft_setelem_is_catchall(set, elem)) {
                nft_set_elem_change_active(net, set, ext);
-               nft_set_elem_clear_busy(ext);
        } else {
                set->ops->activate(net, set, elem);
        }
@@ -6361,8 +6371,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
 
        list_for_each_entry(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
-               if (!nft_is_active(net, ext) ||
-                   nft_set_elem_mark_busy(ext))
+               if (!nft_is_active(net, ext))
                        continue;
 
                kfree(elem->priv);
@@ -6657,8 +6666,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                                     set->objtype, genmask);
                if (IS_ERR(obj)) {
                        err = PTR_ERR(obj);
+                       obj = NULL;
                        goto err_parse_key_end;
                }
+
+               if (!nft_use_inc(&obj->use)) {
+                       err = -EMFILE;
+                       obj = NULL;
+                       goto err_parse_key_end;
+               }
+
                err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
                if (err < 0)
                        goto err_parse_key_end;
@@ -6727,10 +6744,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
        if (flags)
                *nft_set_ext_flags(ext) = flags;
 
-       if (obj) {
+       if (obj)
                *nft_set_ext_obj(ext) = obj;
-               obj->use++;
-       }
+
        if (ulen > 0) {
                if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
                        err = -EINVAL;
@@ -6750,7 +6766,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                goto err_elem_free;
        }
 
-       ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+       ext->genmask = nft_genmask_cur(ctx->net);
 
        err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
        if (err) {
@@ -6798,12 +6814,13 @@ err_element_clash:
        kfree(trans);
 err_elem_free:
        nf_tables_set_elem_destroy(ctx, set, elem.priv);
-       if (obj)
-               obj->use--;
 err_parse_data:
        if (nla[NFTA_SET_ELEM_DATA] != NULL)
                nft_data_release(&elem.data.val, desc.type);
 err_parse_key_end:
+       if (obj)
+               nft_use_dec_restore(&obj->use);
+
        nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
 err_parse_key:
        nft_data_release(&elem.key.val, NFT_DATA_VALUE);
@@ -6883,7 +6900,7 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
                case NFT_JUMP:
                case NFT_GOTO:
                        chain = data->verdict.chain;
-                       chain->use++;
+                       nft_use_inc_restore(&chain->use);
                        break;
                }
        }
@@ -6898,19 +6915,19 @@ static void nft_setelem_data_activate(const struct net *net,
        if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
                nft_data_hold(nft_set_ext_data(ext), set->dtype);
        if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
-               (*nft_set_ext_obj(ext))->use++;
+               nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
 }
 
-static void nft_setelem_data_deactivate(const struct net *net,
-                                       const struct nft_set *set,
-                                       struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+                                const struct nft_set *set,
+                                struct nft_set_elem *elem)
 {
        const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
 
        if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
                nft_data_release(nft_set_ext_data(ext), set->dtype);
        if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
-               (*nft_set_ext_obj(ext))->use--;
+               nft_use_dec(&(*nft_set_ext_obj(ext))->use);
 }
 
 static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
@@ -7067,14 +7084,14 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
 
        list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
-               if (!nft_set_elem_active(ext, genmask) ||
-                   nft_set_elem_mark_busy(ext))
+               if (!nft_set_elem_active(ext, genmask))
                        continue;
 
                elem.priv = catchall->elem;
                ret = __nft_set_catchall_flush(ctx, set, &elem);
                if (ret < 0)
                        break;
+               nft_set_elem_change_active(ctx->net, set, ext);
        }
 
        return ret;
@@ -7142,29 +7159,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
        return err;
 }
 
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
-       struct nft_set_gc_batch *gcb;
-       unsigned int i;
-
-       gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
-       for (i = 0; i < gcb->head.cnt; i++)
-               nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
-       kfree(gcb);
-}
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
-                                               gfp_t gfp)
-{
-       struct nft_set_gc_batch *gcb;
-
-       gcb = kzalloc(sizeof(*gcb), gfp);
-       if (gcb == NULL)
-               return gcb;
-       gcb->head.set = set;
-       return gcb;
-}
-
 /*
  * Stateful objects
  */
@@ -7453,9 +7447,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
 
        nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
+       if (!nft_use_inc(&table->use))
+               return -EMFILE;
+
        type = nft_obj_type_get(net, objtype);
-       if (IS_ERR(type))
-               return PTR_ERR(type);
+       if (IS_ERR(type)) {
+               err = PTR_ERR(type);
+               goto err_type;
+       }
 
        obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
        if (IS_ERR(obj)) {
@@ -7489,7 +7488,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
                goto err_obj_ht;
 
        list_add_tail_rcu(&obj->list, &table->objects);
-       table->use++;
+
        return 0;
 err_obj_ht:
        /* queued in transaction log */
@@ -7505,6 +7504,9 @@ err_strdup:
        kfree(obj);
 err_init:
        module_put(type->owner);
+err_type:
+       nft_use_dec_restore(&table->use);
+
        return err;
 }
 
@@ -7906,7 +7908,7 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
        case NFT_TRANS_PREPARE:
        case NFT_TRANS_ABORT:
        case NFT_TRANS_RELEASE:
-               flowtable->use--;
+               nft_use_dec(&flowtable->use);
                fallthrough;
        default:
                return;
@@ -8260,9 +8262,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
 
        nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 
+       if (!nft_use_inc(&table->use))
+               return -EMFILE;
+
        flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
-       if (!flowtable)
-               return -ENOMEM;
+       if (!flowtable) {
+               err = -ENOMEM;
+               goto flowtable_alloc;
+       }
 
        flowtable->table = table;
        flowtable->handle = nf_tables_alloc_handle(table);
@@ -8317,7 +8324,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
                goto err5;
 
        list_add_tail_rcu(&flowtable->list, &table->flowtables);
-       table->use++;
 
        return 0;
 err5:
@@ -8334,6 +8340,9 @@ err2:
        kfree(flowtable->name);
 err1:
        kfree(flowtable);
+flowtable_alloc:
+       nft_use_dec_restore(&table->use);
+
        return err;
 }
 
@@ -9042,9 +9051,8 @@ static int nf_tables_validate(struct net *net)
                                return -EAGAIN;
 
                        nft_validate_state_update(table, NFT_VALIDATE_SKIP);
+                       break;
                }
-
-               break;
        }
 
        return 0;
@@ -9371,6 +9379,212 @@ void nft_chain_del(struct nft_chain *chain)
        list_del_rcu(&chain->list);
 }
 
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+                                       struct nft_trans_gc *trans)
+{
+       void **priv = trans->priv;
+       unsigned int i;
+
+       for (i = 0; i < trans->count; i++) {
+               struct nft_set_elem elem = {
+                       .priv = priv[i],
+               };
+
+               nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+               nft_setelem_remove(ctx->net, trans->set, &elem);
+       }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+       nft_set_put(trans->set);
+       put_net(trans->net);
+       kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+       struct nft_set_elem elem = {};
+       struct nft_trans_gc *trans;
+       struct nft_ctx ctx = {};
+       unsigned int i;
+
+       trans = container_of(rcu, struct nft_trans_gc, rcu);
+       ctx.net = read_pnet(&trans->set->net);
+
+       for (i = 0; i < trans->count; i++) {
+               elem.priv = trans->priv[i];
+               if (!nft_setelem_is_catchall(trans->set, &elem))
+                       atomic_dec(&trans->set->nelems);
+
+               nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+       }
+
+       nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+       struct nftables_pernet *nft_net;
+       struct nft_ctx ctx = {};
+
+       nft_net = nft_pernet(trans->net);
+
+       mutex_lock(&nft_net->commit_mutex);
+
+       /* Check for race with transaction, otherwise this batch refers to
+        * stale objects that might not be there anymore. Skip transaction if
+        * set has been destroyed from control plane transaction in case gc
+        * worker loses race.
+        */
+       if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+               mutex_unlock(&nft_net->commit_mutex);
+               return false;
+       }
+
+       ctx.net = trans->net;
+       ctx.table = trans->set->table;
+
+       nft_trans_gc_setelem_remove(&ctx, trans);
+       mutex_unlock(&nft_net->commit_mutex);
+
+       return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+       struct nft_trans_gc *trans, *next;
+       LIST_HEAD(trans_gc_list);
+
+       spin_lock(&nf_tables_gc_list_lock);
+       list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+       spin_unlock(&nf_tables_gc_list_lock);
+
+       list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+               list_del(&trans->list);
+               if (!nft_trans_gc_work_done(trans)) {
+                       nft_trans_gc_destroy(trans);
+                       continue;
+               }
+               call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+       }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+                                       unsigned int gc_seq, gfp_t gfp)
+{
+       struct net *net = read_pnet(&set->net);
+       struct nft_trans_gc *trans;
+
+       trans = kzalloc(sizeof(*trans), gfp);
+       if (!trans)
+               return NULL;
+
+       trans->net = maybe_get_net(net);
+       if (!trans->net) {
+               kfree(trans);
+               return NULL;
+       }
+
+       refcount_inc(&set->refs);
+       trans->set = set;
+       trans->seq = gc_seq;
+
+       return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+       trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+       spin_lock(&nf_tables_gc_list_lock);
+       list_add_tail(&trans->list, &nf_tables_gc_list);
+       spin_unlock(&nf_tables_gc_list_lock);
+
+       schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+       return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+                                             unsigned int gc_seq, gfp_t gfp)
+{
+       if (nft_trans_gc_space(gc))
+               return gc;
+
+       nft_trans_gc_queue_work(gc);
+
+       return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+       if (trans->count == 0) {
+               nft_trans_gc_destroy(trans);
+               return;
+       }
+
+       nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+       if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+               return NULL;
+
+       if (nft_trans_gc_space(gc))
+               return gc;
+
+       call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+       return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+       WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+       if (trans->count == 0) {
+               nft_trans_gc_destroy(trans);
+               return;
+       }
+
+       call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+                                          unsigned int gc_seq)
+{
+       struct nft_set_elem_catchall *catchall;
+       const struct nft_set *set = gc->set;
+       struct nft_set_ext *ext;
+
+       list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+               ext = nft_set_elem_ext(set, catchall->elem);
+
+               if (!nft_set_elem_expired(ext))
+                       continue;
+               if (nft_set_elem_is_dead(ext))
+                       goto dead_elem;
+
+               nft_set_elem_dead(ext);
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       return NULL;
+
+               nft_trans_gc_elem_add(gc, catchall->elem);
+       }
+
+       return gc;
+}
+
 static void nf_tables_module_autoload_cleanup(struct net *net)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9529,15 +9743,31 @@ static void nft_set_commit_update(struct list_head *set_update_list)
        }
 }
 
+static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
+{
+       unsigned int gc_seq;
+
+       /* Bump gc counter, it becomes odd, this is the busy mark. */
+       gc_seq = READ_ONCE(nft_net->gc_seq);
+       WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
+       return gc_seq;
+}
+
+static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
+{
+       WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+}
+
 static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
        struct nft_trans *trans, *next;
+       unsigned int base_seq, gc_seq;
        LIST_HEAD(set_update_list);
        struct nft_trans_elem *te;
        struct nft_chain *chain;
        struct nft_table *table;
-       unsigned int base_seq;
        LIST_HEAD(adl);
        int err;
 
@@ -9568,8 +9798,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
        }
 
        /* 0. Validate ruleset, otherwise roll back for error reporting. */
-       if (nf_tables_validate(net) < 0)
+       if (nf_tables_validate(net) < 0) {
+               nft_net->validate_state = NFT_VALIDATE_DO;
                return -EAGAIN;
+       }
 
        err = nft_flow_rule_offload_commit(net);
        if (err < 0)
@@ -9614,6 +9846,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 
        WRITE_ONCE(nft_net->base_seq, base_seq);
 
+       gc_seq = nft_gc_seq_begin(nft_net);
+
        /* step 3. Start new generation, rules_gen_X now in use. */
        net->nft.gencursor = nft_gencursor_next(net);
 
@@ -9713,7 +9947,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                                 */
                                if (nft_set_is_anonymous(nft_trans_set(trans)) &&
                                    !list_empty(&nft_trans_set(trans)->bindings))
-                                       trans->ctx.table->use--;
+                                       nft_use_dec(&trans->ctx.table->use);
                        }
                        nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
                                             NFT_MSG_NEWSET, GFP_KERNEL);
@@ -9721,6 +9955,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                        break;
                case NFT_MSG_DELSET:
                case NFT_MSG_DESTROYSET:
+                       nft_trans_set(trans)->dead = 1;
                        list_del_rcu(&nft_trans_set(trans)->list);
                        nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
                                             trans->msg_type, GFP_KERNEL);
@@ -9823,6 +10058,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
        nft_commit_notify(net, NETLINK_CB(skb).portid);
        nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
        nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+       nft_gc_seq_end(nft_net, gc_seq);
+       nft_net->validate_state = NFT_VALIDATE_SKIP;
        nf_tables_commit_release(net);
 
        return 0;
@@ -9943,7 +10181,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                                        nft_trans_destroy(trans);
                                        break;
                                }
-                               trans->ctx.table->use--;
+                               nft_use_dec_restore(&trans->ctx.table->use);
                                nft_chain_del(trans->ctx.chain);
                                nf_tables_unregister_hook(trans->ctx.net,
                                                          trans->ctx.table,
@@ -9956,7 +10194,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                                list_splice(&nft_trans_chain_hooks(trans),
                                            &nft_trans_basechain(trans)->hook_list);
                        } else {
-                               trans->ctx.table->use++;
+                               nft_use_inc_restore(&trans->ctx.table->use);
                                nft_clear(trans->ctx.net, trans->ctx.chain);
                        }
                        nft_trans_destroy(trans);
@@ -9966,7 +10204,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                                nft_trans_destroy(trans);
                                break;
                        }
-                       trans->ctx.chain->use--;
+                       nft_use_dec_restore(&trans->ctx.chain->use);
                        list_del_rcu(&nft_trans_rule(trans)->list);
                        nft_rule_expr_deactivate(&trans->ctx,
                                                 nft_trans_rule(trans),
@@ -9976,7 +10214,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                        break;
                case NFT_MSG_DELRULE:
                case NFT_MSG_DESTROYRULE:
-                       trans->ctx.chain->use++;
+                       nft_use_inc_restore(&trans->ctx.chain->use);
                        nft_clear(trans->ctx.net, nft_trans_rule(trans));
                        nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
                        if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
@@ -9989,7 +10227,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                                nft_trans_destroy(trans);
                                break;
                        }
-                       trans->ctx.table->use--;
+                       nft_use_dec_restore(&trans->ctx.table->use);
                        if (nft_trans_set_bound(trans)) {
                                nft_trans_destroy(trans);
                                break;
@@ -9998,7 +10236,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                        break;
                case NFT_MSG_DELSET:
                case NFT_MSG_DESTROYSET:
-                       trans->ctx.table->use++;
+                       nft_use_inc_restore(&trans->ctx.table->use);
                        nft_clear(trans->ctx.net, nft_trans_set(trans));
                        if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
                                nft_map_activate(&trans->ctx, nft_trans_set(trans));
@@ -10042,13 +10280,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                                nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
                                nft_trans_destroy(trans);
                        } else {
-                               trans->ctx.table->use--;
+                               nft_use_dec_restore(&trans->ctx.table->use);
                                nft_obj_del(nft_trans_obj(trans));
                        }
                        break;
                case NFT_MSG_DELOBJ:
                case NFT_MSG_DESTROYOBJ:
-                       trans->ctx.table->use++;
+                       nft_use_inc_restore(&trans->ctx.table->use);
                        nft_clear(trans->ctx.net, nft_trans_obj(trans));
                        nft_trans_destroy(trans);
                        break;
@@ -10057,7 +10295,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                                nft_unregister_flowtable_net_hooks(net,
                                                &nft_trans_flowtable_hooks(trans));
                        } else {
-                               trans->ctx.table->use--;
+                               nft_use_dec_restore(&trans->ctx.table->use);
                                list_del_rcu(&nft_trans_flowtable(trans)->list);
                                nft_unregister_flowtable_net_hooks(net,
                                                &nft_trans_flowtable(trans)->hook_list);
@@ -10069,7 +10307,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                                list_splice(&nft_trans_flowtable_hooks(trans),
                                            &nft_trans_flowtable(trans)->hook_list);
                        } else {
-                               trans->ctx.table->use++;
+                               nft_use_inc_restore(&trans->ctx.table->use);
                                nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
                        }
                        nft_trans_destroy(trans);
@@ -10099,8 +10337,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
                           enum nfnl_abort_action action)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
-       int ret = __nf_tables_abort(net, action);
+       unsigned int gc_seq;
+       int ret;
 
+       gc_seq = nft_gc_seq_begin(nft_net);
+       ret = __nf_tables_abort(net, action);
+       nft_gc_seq_end(nft_net, gc_seq);
        mutex_unlock(&nft_net->commit_mutex);
 
        return ret;
@@ -10477,6 +10719,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
 
        if (!tb[NFTA_VERDICT_CODE])
                return -EINVAL;
+
+       /* zero padding hole for memcmp */
+       memset(data, 0, sizeof(*data));
        data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
 
        switch (data->verdict.code) {
@@ -10502,7 +10747,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
                                                 genmask);
                } else if (tb[NFTA_VERDICT_CHAIN_ID]) {
                        chain = nft_chain_lookup_byid(ctx->net, ctx->table,
-                                                     tb[NFTA_VERDICT_CHAIN_ID]);
+                                                     tb[NFTA_VERDICT_CHAIN_ID],
+                                                     genmask);
                        if (IS_ERR(chain))
                                return PTR_ERR(chain);
                } else {
@@ -10518,8 +10764,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
                if (desc->flags & NFT_DATA_DESC_SETELEM &&
                    chain->flags & NFT_CHAIN_BINDING)
                        return -EINVAL;
+               if (!nft_use_inc(&chain->use))
+                       return -EMFILE;
 
-               chain->use++;
                data->verdict.chain = chain;
                break;
        }
@@ -10537,7 +10784,7 @@ static void nft_verdict_uninit(const struct nft_data *data)
        case NFT_JUMP:
        case NFT_GOTO:
                chain = data->verdict.chain;
-               chain->use--;
+               nft_use_dec(&chain->use);
                break;
        }
 }
@@ -10706,11 +10953,11 @@ int __nft_release_basechain(struct nft_ctx *ctx)
        nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
        list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
                list_del(&rule->list);
-               ctx->chain->use--;
+               nft_use_dec(&ctx->chain->use);
                nf_tables_rule_release(ctx, rule);
        }
        nft_chain_del(ctx->chain);
-       ctx->table->use--;
+       nft_use_dec(&ctx->table->use);
        nf_tables_chain_destroy(ctx);
 
        return 0;
@@ -10757,21 +11004,24 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
        ctx.family = table->family;
        ctx.table = table;
        list_for_each_entry(chain, &table->chains, list) {
+               if (nft_chain_is_bound(chain))
+                       continue;
+
                ctx.chain = chain;
                list_for_each_entry_safe(rule, nr, &chain->rules, list) {
                        list_del(&rule->list);
-                       chain->use--;
+                       nft_use_dec(&chain->use);
                        nf_tables_rule_release(&ctx, rule);
                }
        }
        list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
                list_del(&flowtable->list);
-               table->use--;
+               nft_use_dec(&table->use);
                nf_tables_flowtable_destroy(flowtable);
        }
        list_for_each_entry_safe(set, ns, &table->sets, list) {
                list_del(&set->list);
-               table->use--;
+               nft_use_dec(&table->use);
                if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
                        nft_map_deactivate(&ctx, set);
 
@@ -10779,13 +11029,13 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
        }
        list_for_each_entry_safe(obj, ne, &table->objects, list) {
                nft_obj_del(obj);
-               table->use--;
+               nft_use_dec(&table->use);
                nft_obj_destroy(&ctx, obj);
        }
        list_for_each_entry_safe(chain, nc, &table->chains, list) {
                ctx.chain = chain;
                nft_chain_del(chain);
-               table->use--;
+               nft_use_dec(&table->use);
                nf_tables_chain_destroy(&ctx);
        }
        nf_tables_table_destroy(&ctx);
@@ -10815,6 +11065,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
        struct net *net = n->net;
        unsigned int deleted;
        bool restart = false;
+       unsigned int gc_seq;
 
        if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
                return NOTIFY_DONE;
@@ -10822,8 +11073,11 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
        nft_net = nft_pernet(net);
        deleted = 0;
        mutex_lock(&nft_net->commit_mutex);
+
+       gc_seq = nft_gc_seq_begin(nft_net);
+
        if (!list_empty(&nf_tables_destroy_list))
-               rcu_barrier();
+               nf_tables_trans_destroy_flush_work();
 again:
        list_for_each_entry(table, &nft_net->tables, list) {
                if (nft_table_has_owner(table) &&
@@ -10844,6 +11098,8 @@ again:
                if (restart)
                        goto again;
        }
+       nft_gc_seq_end(nft_net, gc_seq);
+
        mutex_unlock(&nft_net->commit_mutex);
 
        return NOTIFY_DONE;
@@ -10864,6 +11120,8 @@ static int __net_init nf_tables_init_net(struct net *net)
        INIT_LIST_HEAD(&nft_net->notify_list);
        mutex_init(&nft_net->commit_mutex);
        nft_net->base_seq = 1;
+       nft_net->gc_seq = 0;
+       nft_net->validate_state = NFT_VALIDATE_SKIP;
 
        return 0;
 }
@@ -10880,22 +11138,36 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
 static void __net_exit nf_tables_exit_net(struct net *net)
 {
        struct nftables_pernet *nft_net = nft_pernet(net);
+       unsigned int gc_seq;
 
        mutex_lock(&nft_net->commit_mutex);
+
+       gc_seq = nft_gc_seq_begin(nft_net);
+
        if (!list_empty(&nft_net->commit_list) ||
            !list_empty(&nft_net->module_list))
                __nf_tables_abort(net, NFNL_ABORT_NONE);
+
        __nft_release_tables(net);
+
+       nft_gc_seq_end(nft_net, gc_seq);
+
        mutex_unlock(&nft_net->commit_mutex);
        WARN_ON_ONCE(!list_empty(&nft_net->tables));
        WARN_ON_ONCE(!list_empty(&nft_net->module_list));
        WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
 }
 
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+       flush_work(&trans_gc_work);
+}
+
 static struct pernet_operations nf_tables_net_ops = {
        .init           = nf_tables_init_net,
        .pre_exit       = nf_tables_pre_exit_net,
        .exit           = nf_tables_exit_net,
+       .exit_batch     = nf_tables_exit_batch,
        .id             = &nf_tables_net_id,
        .size           = sizeof(struct nftables_pernet),
 };
@@ -10967,6 +11239,7 @@ static void __exit nf_tables_module_exit(void)
        nft_chain_filter_fini();
        nft_chain_route_fini();
        unregister_pernet_subsys(&nf_tables_net_ops);
+       cancel_work_sync(&trans_gc_work);
        cancel_work_sync(&trans_destroy_work);
        rcu_barrier();
        rhltable_destroy(&nft_objname_ht);
index 9a85e79..e596d1a 100644 (file)
@@ -30,11 +30,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
        const struct nft_byteorder *priv = nft_expr_priv(expr);
        u32 *src = &regs->data[priv->sreg];
        u32 *dst = &regs->data[priv->dreg];
-       union { u32 u32; u16 u16; } *s, *d;
+       u16 *s16, *d16;
        unsigned int i;
 
-       s = (void *)src;
-       d = (void *)dst;
+       s16 = (void *)src;
+       d16 = (void *)dst;
 
        switch (priv->size) {
        case 8: {
@@ -62,11 +62,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
                switch (priv->op) {
                case NFT_BYTEORDER_NTOH:
                        for (i = 0; i < priv->len / 4; i++)
-                               d[i].u32 = ntohl((__force __be32)s[i].u32);
+                               dst[i] = ntohl((__force __be32)src[i]);
                        break;
                case NFT_BYTEORDER_HTON:
                        for (i = 0; i < priv->len / 4; i++)
-                               d[i].u32 = (__force __u32)htonl(s[i].u32);
+                               dst[i] = (__force __u32)htonl(src[i]);
                        break;
                }
                break;
@@ -74,11 +74,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
                switch (priv->op) {
                case NFT_BYTEORDER_NTOH:
                        for (i = 0; i < priv->len / 2; i++)
-                               d[i].u16 = ntohs((__force __be16)s[i].u16);
+                               d16[i] = ntohs((__force __be16)s16[i]);
                        break;
                case NFT_BYTEORDER_HTON:
                        for (i = 0; i < priv->len / 2; i++)
-                               d[i].u16 = (__force __u16)htons(s[i].u16);
+                               d16[i] = (__force __u16)htons(s16[i]);
                        break;
                }
                break;
index 4fb34d7..5c5cc01 100644 (file)
@@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
        if (IS_ERR(set))
                return PTR_ERR(set);
 
+       if (set->flags & NFT_SET_OBJECT)
+               return -EOPNOTSUPP;
+
        if (set->ops->update == NULL)
                return -EOPNOTSUPP;
 
index 5ef9146..ab3362c 100644 (file)
@@ -408,8 +408,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
        if (IS_ERR(flowtable))
                return PTR_ERR(flowtable);
 
+       if (!nft_use_inc(&flowtable->use))
+               return -EMFILE;
+
        priv->flowtable = flowtable;
-       flowtable->use++;
 
        return nf_ct_netns_get(ctx->net, ctx->family);
 }
@@ -428,7 +430,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx,
 {
        struct nft_flow_offload *priv = nft_expr_priv(expr);
 
-       priv->flowtable->use++;
+       nft_use_inc_restore(&priv->flowtable->use);
 }
 
 static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
index 3d76ebf..fccb3cf 100644 (file)
@@ -125,15 +125,27 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
        return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
 }
 
+static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx,
+                                          struct nft_chain *chain,
+                                          enum nft_trans_phase phase)
+{
+       struct nft_ctx chain_ctx;
+       struct nft_rule *rule;
+
+       chain_ctx = *ctx;
+       chain_ctx.chain = chain;
+
+       list_for_each_entry(rule, &chain->rules, list)
+               nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+}
+
 static void nft_immediate_deactivate(const struct nft_ctx *ctx,
                                     const struct nft_expr *expr,
                                     enum nft_trans_phase phase)
 {
        const struct nft_immediate_expr *priv = nft_expr_priv(expr);
        const struct nft_data *data = &priv->data;
-       struct nft_ctx chain_ctx;
        struct nft_chain *chain;
-       struct nft_rule *rule;
 
        if (priv->dreg == NFT_REG_VERDICT) {
                switch (data->verdict.code) {
@@ -143,23 +155,20 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
                        if (!nft_chain_binding(chain))
                                break;
 
-                       chain_ctx = *ctx;
-                       chain_ctx.chain = chain;
-
-                       list_for_each_entry(rule, &chain->rules, list)
-                               nft_rule_expr_deactivate(&chain_ctx, rule, phase);
-
                        switch (phase) {
                        case NFT_TRANS_PREPARE_ERROR:
                                nf_tables_unbind_chain(ctx, chain);
-                               fallthrough;
+                               nft_deactivate_next(ctx->net, chain);
+                               break;
                        case NFT_TRANS_PREPARE:
+                               nft_immediate_chain_deactivate(ctx, chain, phase);
                                nft_deactivate_next(ctx->net, chain);
                                break;
                        default:
+                               nft_immediate_chain_deactivate(ctx, chain, phase);
                                nft_chain_del(chain);
                                chain->bound = false;
-                               chain->table->use--;
+                               nft_use_dec(&chain->table->use);
                                break;
                        }
                        break;
@@ -198,7 +207,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
                 * let the transaction records release this chain and its rules.
                 */
                if (chain->bound) {
-                       chain->use--;
+                       nft_use_dec(&chain->use);
                        break;
                }
 
@@ -206,9 +215,9 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
                chain_ctx = *ctx;
                chain_ctx.chain = chain;
 
-               chain->use--;
+               nft_use_dec(&chain->use);
                list_for_each_entry_safe(rule, n, &chain->rules, list) {
-                       chain->use--;
+                       nft_use_dec(&chain->use);
                        list_del(&rule->list);
                        nf_tables_rule_destroy(&chain_ctx, rule);
                }
index a48dd5b..509011b 100644 (file)
@@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx,
        if (IS_ERR(obj))
                return -ENOENT;
 
+       if (!nft_use_inc(&obj->use))
+               return -EMFILE;
+
        nft_objref_priv(expr) = obj;
-       obj->use++;
 
        return 0;
 }
@@ -72,7 +74,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx,
        if (phase == NFT_TRANS_COMMIT)
                return;
 
-       obj->use--;
+       nft_use_dec(&obj->use);
 }
 
 static void nft_objref_activate(const struct nft_ctx *ctx,
@@ -80,7 +82,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx,
 {
        struct nft_object *obj = nft_objref_priv(expr);
 
-       obj->use++;
+       nft_use_inc_restore(&obj->use);
 }
 
 static const struct nft_expr_ops nft_objref_ops = {
index 0b73cb0..5247636 100644 (file)
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
 
        if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
                return 1;
+       if (nft_set_elem_is_dead(&he->ext))
+               return 1;
        if (nft_set_elem_expired(&he->ext))
                return 1;
        if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
        struct nft_rhash_elem *he = elem->priv;
 
        nft_set_elem_change_active(net, set, &he->ext);
-       nft_set_elem_clear_busy(&he->ext);
 }
 
 static bool nft_rhash_flush(const struct net *net,
@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
 {
        struct nft_rhash_elem *he = priv;
 
-       if (!nft_set_elem_mark_busy(&he->ext) ||
-           !nft_is_active(net, &he->ext)) {
-               nft_set_elem_change_active(net, set, &he->ext);
-               return true;
-       }
-       return false;
+       nft_set_elem_change_active(net, set, &he->ext);
+
+       return true;
 }
 
 static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
 
        rcu_read_lock();
        he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
-       if (he != NULL &&
-           !nft_rhash_flush(net, set, he))
-               he = NULL;
+       if (he)
+               nft_set_elem_change_active(net, set, &he->ext);
 
        rcu_read_unlock();
 
@@ -252,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
        if (he == NULL)
                return false;
 
-       return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+       nft_set_elem_dead(&he->ext);
+
+       return true;
 }
 
 static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -278,8 +277,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
                if (iter->count < iter->skip)
                        goto cont;
-               if (nft_set_elem_expired(&he->ext))
-                       goto cont;
                if (!nft_set_elem_active(&he->ext, iter->genmask))
                        goto cont;
 
@@ -314,25 +311,51 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
 
 static void nft_rhash_gc(struct work_struct *work)
 {
+       struct nftables_pernet *nft_net;
        struct nft_set *set;
        struct nft_rhash_elem *he;
        struct nft_rhash *priv;
-       struct nft_set_gc_batch *gcb = NULL;
        struct rhashtable_iter hti;
+       struct nft_trans_gc *gc;
+       struct net *net;
+       u32 gc_seq;
 
        priv = container_of(work, struct nft_rhash, gc_work.work);
        set  = nft_set_container_of(priv);
+       net  = read_pnet(&set->net);
+       nft_net = nft_pernet(net);
+       gc_seq = READ_ONCE(nft_net->gc_seq);
+
+       if (nft_set_gc_is_pending(set))
+               goto done;
+
+       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       if (!gc)
+               goto done;
 
        rhashtable_walk_enter(&priv->ht, &hti);
        rhashtable_walk_start(&hti);
 
        while ((he = rhashtable_walk_next(&hti))) {
                if (IS_ERR(he)) {
-                       if (PTR_ERR(he) != -EAGAIN)
-                               break;
+                       if (PTR_ERR(he) != -EAGAIN) {
+                               nft_trans_gc_destroy(gc);
+                               gc = NULL;
+                               goto try_later;
+                       }
                        continue;
                }
 
+               /* Ruleset has been updated, try later. */
+               if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+                       nft_trans_gc_destroy(gc);
+                       gc = NULL;
+                       goto try_later;
+               }
+
+               if (nft_set_elem_is_dead(&he->ext))
+                       goto dead_elem;
+
                if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
                    nft_rhash_expr_needs_gc_run(set, &he->ext))
                        goto needs_gc_run;
@@ -340,26 +363,26 @@ static void nft_rhash_gc(struct work_struct *work)
                if (!nft_set_elem_expired(&he->ext))
                        continue;
 needs_gc_run:
-               if (nft_set_elem_mark_busy(&he->ext))
-                       continue;
+               nft_set_elem_dead(&he->ext);
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
 
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb == NULL)
-                       break;
-               rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, he);
+               nft_trans_gc_elem_add(gc, he);
        }
+
+       gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
+       /* catchall list iteration requires rcu read side lock. */
        rhashtable_walk_stop(&hti);
        rhashtable_walk_exit(&hti);
 
-       he = nft_set_catchall_gc(set);
-       if (he) {
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb)
-                       nft_set_gc_batch_add(gcb, he);
-       }
-       nft_set_gc_batch_complete(gcb);
+       if (gc)
+               nft_trans_gc_queue_async_done(gc);
+
+done:
        queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
                           nft_set_gc_interval(set));
 }
@@ -394,7 +417,7 @@ static int nft_rhash_init(const struct nft_set *set,
                return err;
 
        INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
-       if (set->flags & NFT_SET_TIMEOUT)
+       if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
                nft_rhash_gc_init(set);
 
        return 0;
@@ -422,7 +445,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
        };
 
        cancel_delayed_work_sync(&priv->gc_work);
-       rcu_barrier();
        rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
                                    (void *)&rhash_ctx);
 }
index db526cb..6af9c9e 100644 (file)
@@ -566,8 +566,9 @@ next_match:
                        goto out;
 
                if (last) {
-                       if (nft_set_elem_expired(&f->mt[b].e->ext) ||
-                           (genmask &&
+                       if (nft_set_elem_expired(&f->mt[b].e->ext))
+                               goto next_match;
+                       if ((genmask &&
                             !nft_set_elem_active(&f->mt[b].e->ext, genmask)))
                                goto next_match;
 
@@ -602,7 +603,7 @@ static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
                            const struct nft_set_elem *elem, unsigned int flags)
 {
        return pipapo_get(net, set, (const u8 *)elem->key.val.data,
-                         nft_genmask_cur(net));
+                        nft_genmask_cur(net));
 }
 
 /**
@@ -901,12 +902,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
 static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
                         int mask_bits)
 {
-       int rule = f->rules++, group, ret, bit_offset = 0;
+       int rule = f->rules, group, ret, bit_offset = 0;
 
-       ret = pipapo_resize(f, f->rules - 1, f->rules);
+       ret = pipapo_resize(f, f->rules, f->rules + 1);
        if (ret)
                return ret;
 
+       f->rules++;
+
        for (group = 0; group < f->groups; group++) {
                int i, v;
                u8 mask;
@@ -1051,7 +1054,9 @@ static int pipapo_expand(struct nft_pipapo_field *f,
                        step++;
                        if (step >= len) {
                                if (!masks) {
-                                       pipapo_insert(f, base, 0);
+                                       err = pipapo_insert(f, base, 0);
+                                       if (err < 0)
+                                               return err;
                                        masks = 1;
                                }
                                goto out;
@@ -1234,6 +1239,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
                else
                        ret = pipapo_expand(f, start, end, f->groups * f->bb);
 
+               if (ret < 0)
+                       return ret;
+
                if (f->bsize > bsize_max)
                        bsize_max = f->bsize;
 
@@ -1528,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
        }
 }
 
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+                                    struct nft_pipapo_elem *e)
+
+{
+       struct nft_set_elem elem = {
+               .priv   = e,
+       };
+
+       nft_setelem_data_deactivate(net, set, &elem);
+}
+
 /**
  * pipapo_gc() - Drop expired entries from set, destroy start and end elements
- * @set:       nftables API set representation
+ * @_set:      nftables API set representation
  * @m:         Matching data
  */
-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
 {
+       struct nft_set *set = (struct nft_set *) _set;
        struct nft_pipapo *priv = nft_set_priv(set);
+       struct net *net = read_pnet(&set->net);
        int rules_f0, first_rule = 0;
        struct nft_pipapo_elem *e;
+       struct nft_trans_gc *gc;
+
+       gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+       if (!gc)
+               return;
 
        while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
                union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1561,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
                f--;
                i--;
                e = f->mt[rulemap[i].to].e;
-               if (nft_set_elem_expired(&e->ext) &&
-                   !nft_set_elem_mark_busy(&e->ext)) {
+
+               /* synchronous gc never fails, there is no need to set on
+                * NFT_SET_ELEM_DEAD_BIT.
+                */
+               if (nft_set_elem_expired(&e->ext)) {
                        priv->dirty = true;
-                       pipapo_drop(m, rulemap);
 
-                       rcu_barrier();
-                       nft_set_elem_destroy(set, e, true);
+                       gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+                       if (!gc)
+                               break;
+
+                       nft_pipapo_gc_deactivate(net, set, e);
+                       pipapo_drop(m, rulemap);
+                       nft_trans_gc_elem_add(gc, e);
 
                        /* And check again current first rule, which is now the
                         * first we haven't checked.
@@ -1577,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
                }
        }
 
-       e = nft_set_catchall_gc(set);
-       if (e)
-               nft_set_elem_destroy(set, e, true);
-
-       priv->last_gc = jiffies;
+       gc = nft_trans_gc_catchall(gc, 0);
+       if (gc) {
+               nft_trans_gc_queue_sync_done(gc);
+               priv->last_gc = jiffies;
+       }
 }
 
 /**
@@ -1664,6 +1697,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
        priv->clone = new_clone;
 }
 
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+       const struct net *net = read_pnet(&set->net);
+
+       return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+       return true;
+#endif
+}
+
 static void nft_pipapo_abort(const struct nft_set *set)
 {
        struct nft_pipapo *priv = nft_set_priv(set);
@@ -1672,7 +1716,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
        if (!priv->dirty)
                return;
 
-       m = rcu_dereference(priv->match);
+       m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
 
        new_clone = pipapo_clone(m);
        if (IS_ERR(new_clone))
@@ -1699,14 +1743,9 @@ static void nft_pipapo_activate(const struct net *net,
                                const struct nft_set *set,
                                const struct nft_set_elem *elem)
 {
-       struct nft_pipapo_elem *e;
-
-       e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
-       if (IS_ERR(e))
-               return;
+       struct nft_pipapo_elem *e = elem->priv;
 
        nft_set_elem_change_active(net, set, &e->ext);
-       nft_set_elem_clear_busy(&e->ext);
 }
 
 /**
@@ -1918,10 +1957,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
 
        data = (const u8 *)nft_set_ext_key(&e->ext);
 
-       e = pipapo_get(net, set, data, 0);
-       if (IS_ERR(e))
-               return;
-
        while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
                union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
                const u8 *match_start, *match_end;
@@ -1929,7 +1964,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
                int i, start, rules_fx;
 
                match_start = data;
-               match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+
+               if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
+                       match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+               else
+                       match_end = data;
 
                start = first_rule;
                rules_fx = rules_f0;
@@ -2001,8 +2040,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
                        goto cont;
 
                e = f->mt[r].e;
-               if (nft_set_elem_expired(&e->ext))
-                       goto cont;
 
                elem.priv = e;
 
index 5c05c9b..c6435e7 100644 (file)
@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
                      set->klen);
 }
 
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+       return nft_set_elem_expired(&rbe->ext) ||
+              nft_set_elem_is_dead(&rbe->ext);
+}
+
 static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
                                const u32 *key, const struct nft_set_ext **ext,
                                unsigned int seq)
@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
                                continue;
                        }
 
-                       if (nft_set_elem_expired(&rbe->ext))
+                       if (nft_rbtree_elem_expired(rbe))
                                return false;
 
                        if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
 
        if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
            nft_set_elem_active(&interval->ext, genmask) &&
-           !nft_set_elem_expired(&interval->ext) &&
+           !nft_rbtree_elem_expired(interval) &&
            nft_rbtree_interval_start(interval)) {
                *ext = &interval->ext;
                return true;
@@ -215,38 +221,70 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
        return rbe;
 }
 
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+                                struct nft_rbtree *priv,
+                                struct nft_rbtree_elem *rbe)
+{
+       struct nft_set_elem elem = {
+               .priv   = rbe,
+       };
+
+       nft_setelem_data_deactivate(net, set, &elem);
+       rb_erase(&rbe->node, &priv->root);
+}
+
 static int nft_rbtree_gc_elem(const struct nft_set *__set,
                              struct nft_rbtree *priv,
-                             struct nft_rbtree_elem *rbe)
+                             struct nft_rbtree_elem *rbe,
+                             u8 genmask)
 {
        struct nft_set *set = (struct nft_set *)__set;
        struct rb_node *prev = rb_prev(&rbe->node);
-       struct nft_rbtree_elem *rbe_prev = NULL;
-       struct nft_set_gc_batch *gcb;
+       struct net *net = read_pnet(&set->net);
+       struct nft_rbtree_elem *rbe_prev;
+       struct nft_trans_gc *gc;
 
-       gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
-       if (!gcb)
+       gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+       if (!gc)
                return -ENOMEM;
 
-       /* search for expired end interval coming before this element. */
+       /* search for end interval coming before this element.
+        * end intervals don't carry a timeout extension, they
+        * are coupled with the interval start element.
+        */
        while (prev) {
                rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
-               if (nft_rbtree_interval_end(rbe_prev))
+               if (nft_rbtree_interval_end(rbe_prev) &&
+                   nft_set_elem_active(&rbe_prev->ext, genmask))
                        break;
 
                prev = rb_prev(prev);
        }
 
-       if (rbe_prev) {
-               rb_erase(&rbe_prev->node, &priv->root);
-               atomic_dec(&set->nelems);
+       if (prev) {
+               rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+               nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+
+               /* There is always room in this trans gc for this element,
+                * memory allocation never actually happens, hence, the warning
+                * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+                * this is synchronous gc which never fails.
+                */
+               gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+               if (WARN_ON_ONCE(!gc))
+                       return -ENOMEM;
+
+               nft_trans_gc_elem_add(gc, rbe_prev);
        }
 
-       rb_erase(&rbe->node, &priv->root);
-       atomic_dec(&set->nelems);
+       nft_rbtree_gc_remove(net, set, priv, rbe);
+       gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+       if (WARN_ON_ONCE(!gc))
+               return -ENOMEM;
 
-       nft_set_gc_batch_add(gcb, rbe);
-       nft_set_gc_batch_complete(gcb);
+       nft_trans_gc_elem_add(gc, rbe);
+
+       nft_trans_gc_queue_sync_done(gc);
 
        return 0;
 }
@@ -321,7 +359,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
 
                /* perform garbage collection to avoid bogus overlap reports. */
                if (nft_set_elem_expired(&rbe->ext)) {
-                       err = nft_rbtree_gc_elem(set, priv, rbe);
+                       err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
                        if (err < 0)
                                return err;
 
@@ -474,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
        struct nft_rbtree_elem *rbe = elem->priv;
 
        nft_set_elem_change_active(net, set, &rbe->ext);
-       nft_set_elem_clear_busy(&rbe->ext);
 }
 
 static bool nft_rbtree_flush(const struct net *net,
@@ -482,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
 {
        struct nft_rbtree_elem *rbe = priv;
 
-       if (!nft_set_elem_mark_busy(&rbe->ext) ||
-           !nft_is_active(net, &rbe->ext)) {
-               nft_set_elem_change_active(net, set, &rbe->ext);
-               return true;
-       }
-       return false;
+       nft_set_elem_change_active(net, set, &rbe->ext);
+
+       return true;
 }
 
 static void *nft_rbtree_deactivate(const struct net *net,
@@ -544,8 +578,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
                if (iter->count < iter->skip)
                        goto cont;
-               if (nft_set_elem_expired(&rbe->ext))
-                       goto cont;
                if (!nft_set_elem_active(&rbe->ext, iter->genmask))
                        goto cont;
 
@@ -564,26 +596,43 @@ cont:
 
 static void nft_rbtree_gc(struct work_struct *work)
 {
-       struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
-       struct nft_set_gc_batch *gcb = NULL;
+       struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+       struct nftables_pernet *nft_net;
        struct nft_rbtree *priv;
+       struct nft_trans_gc *gc;
        struct rb_node *node;
        struct nft_set *set;
+       unsigned int gc_seq;
        struct net *net;
-       u8 genmask;
 
        priv = container_of(work, struct nft_rbtree, gc_work.work);
        set  = nft_set_container_of(priv);
        net  = read_pnet(&set->net);
-       genmask = nft_genmask_cur(net);
+       nft_net = nft_pernet(net);
+       gc_seq  = READ_ONCE(nft_net->gc_seq);
+
+       if (nft_set_gc_is_pending(set))
+               goto done;
+
+       gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+       if (!gc)
+               goto done;
 
        write_lock_bh(&priv->lock);
        write_seqcount_begin(&priv->count);
        for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+               /* Ruleset has been updated, try later. */
+               if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+                       nft_trans_gc_destroy(gc);
+                       gc = NULL;
+                       goto try_later;
+               }
+
                rbe = rb_entry(node, struct nft_rbtree_elem, node);
 
-               if (!nft_set_elem_active(&rbe->ext, genmask))
-                       continue;
+               if (nft_set_elem_is_dead(&rbe->ext))
+                       goto dead_elem;
 
                /* elements are reversed in the rbtree for historical reasons,
                 * from highest to lowest value, that is why end element is
@@ -596,46 +645,36 @@ static void nft_rbtree_gc(struct work_struct *work)
                if (!nft_set_elem_expired(&rbe->ext))
                        continue;
 
-               if (nft_set_elem_mark_busy(&rbe->ext)) {
-                       rbe_end = NULL;
+               nft_set_elem_dead(&rbe->ext);
+
+               if (!rbe_end)
                        continue;
-               }
 
-               if (rbe_prev) {
-                       rb_erase(&rbe_prev->node, &priv->root);
-                       rbe_prev = NULL;
-               }
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (!gcb)
-                       break;
+               nft_set_elem_dead(&rbe_end->ext);
 
-               atomic_dec(&set->nelems);
-               nft_set_gc_batch_add(gcb, rbe);
-               rbe_prev = rbe;
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
 
-               if (rbe_end) {
-                       atomic_dec(&set->nelems);
-                       nft_set_gc_batch_add(gcb, rbe_end);
-                       rb_erase(&rbe_end->node, &priv->root);
-                       rbe_end = NULL;
-               }
-               node = rb_next(node);
-               if (!node)
-                       break;
+               nft_trans_gc_elem_add(gc, rbe_end);
+               rbe_end = NULL;
+dead_elem:
+               gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+               if (!gc)
+                       goto try_later;
+
+               nft_trans_gc_elem_add(gc, rbe);
        }
-       if (rbe_prev)
-               rb_erase(&rbe_prev->node, &priv->root);
+
+       gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
        write_seqcount_end(&priv->count);
        write_unlock_bh(&priv->lock);
 
-       rbe = nft_set_catchall_gc(set);
-       if (rbe) {
-               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
-               if (gcb)
-                       nft_set_gc_batch_add(gcb, rbe);
-       }
-       nft_set_gc_batch_complete(gcb);
-
+       if (gc)
+               nft_trans_gc_queue_async_done(gc);
+done:
        queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
                           nft_set_gc_interval(set));
 }
index 84def74..9ed85be 100644 (file)
@@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
                break;
        case NFT_SOCKET_MARK:
                if (sk_fullsock(sk)) {
-                       *dest = sk->sk_mark;
+                       *dest = READ_ONCE(sk->sk_mark);
                } else {
                        regs->verdict.code = NFT_BREAK;
                        return;
index 7013f55..76e01f2 100644 (file)
@@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 
                if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
                    transparent && sk_fullsock(sk))
-                       pskb->mark = sk->sk_mark;
+                       pskb->mark = READ_ONCE(sk->sk_mark);
 
                if (sk != skb->sk)
                        sock_gen_put(sk);
@@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 
                if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
                    transparent && sk_fullsock(sk))
-                       pskb->mark = sk->sk_mark;
+                       pskb->mark = READ_ONCE(sk->sk_mark);
 
                if (sk != skb->sk)
                        sock_gen_put(sk);
index a6d2a0b..3d7a91e 100644 (file)
@@ -1829,7 +1829,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
        parms.port_no = OVSP_LOCAL;
        parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
        parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
-               ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+               ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
 
        /* So far only local changes have been made, now need the lock. */
        ovs_lock();
@@ -2049,7 +2049,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
        [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
        [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
                PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
-       [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
+       [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
 };
 
 static const struct genl_small_ops dp_datapath_genl_ops[] = {
@@ -2302,7 +2302,7 @@ restart:
        parms.port_no = port_no;
        parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
        parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
-               ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+               ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
 
        vport = new_vport(&parms);
        err = PTR_ERR(vport);
@@ -2539,7 +2539,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
        [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
        [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
        [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
-       [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+       [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
        [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
        [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
 };
index 85ff90a..a2935bd 100644 (file)
@@ -401,18 +401,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
 {
        union tpacket_uhdr h;
 
+       /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
        h.raw = frame;
        switch (po->tp_version) {
        case TPACKET_V1:
-               h.h1->tp_status = status;
+               WRITE_ONCE(h.h1->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h1->tp_status));
                break;
        case TPACKET_V2:
-               h.h2->tp_status = status;
+               WRITE_ONCE(h.h2->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h2->tp_status));
                break;
        case TPACKET_V3:
-               h.h3->tp_status = status;
+               WRITE_ONCE(h.h3->tp_status, status);
                flush_dcache_page(pgv_to_page(&h.h3->tp_status));
                break;
        default:
@@ -429,17 +431,19 @@ static int __packet_get_status(const struct packet_sock *po, void *frame)
 
        smp_rmb();
 
+       /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
        h.raw = frame;
        switch (po->tp_version) {
        case TPACKET_V1:
                flush_dcache_page(pgv_to_page(&h.h1->tp_status));
-               return h.h1->tp_status;
+               return READ_ONCE(h.h1->tp_status);
        case TPACKET_V2:
                flush_dcache_page(pgv_to_page(&h.h2->tp_status));
-               return h.h2->tp_status;
+               return READ_ONCE(h.h2->tp_status);
        case TPACKET_V3:
                flush_dcache_page(pgv_to_page(&h.h3->tp_status));
-               return h.h3->tp_status;
+               return READ_ONCE(h.h3->tp_status);
        default:
                WARN(1, "TPACKET version not supported.\n");
                BUG();
@@ -2050,8 +2054,8 @@ retry:
 
        skb->protocol = proto;
        skb->dev = dev;
-       skb->priority = sk->sk_priority;
-       skb->mark = sk->sk_mark;
+       skb->priority = READ_ONCE(sk->sk_priority);
+       skb->mark = READ_ONCE(sk->sk_mark);
        skb->tstamp = sockc.transmit_time;
 
        skb_setup_tx_timestamp(skb, sockc.tsflags);
@@ -2585,8 +2589,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 
        skb->protocol = proto;
        skb->dev = dev;
-       skb->priority = po->sk.sk_priority;
-       skb->mark = po->sk.sk_mark;
+       skb->priority = READ_ONCE(po->sk.sk_priority);
+       skb->mark = READ_ONCE(po->sk.sk_mark);
        skb->tstamp = sockc->transmit_time;
        skb_setup_tx_timestamp(skb, sockc->tsflags);
        skb_zcopy_set_nouarg(skb, ph.raw);
@@ -2988,7 +2992,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
                goto out_unlock;
 
        sockcm_init(&sockc, sk);
-       sockc.mark = sk->sk_mark;
+       sockc.mark = READ_ONCE(sk->sk_mark);
        if (msg->msg_controllen) {
                err = sock_cmsg_send(sk, msg, &sockc);
                if (unlikely(err))
@@ -3061,7 +3065,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
        skb->protocol = proto;
        skb->dev = dev;
-       skb->priority = sk->sk_priority;
+       skb->priority = READ_ONCE(sk->sk_priority);
        skb->mark = sockc.mark;
        skb->tstamp = sockc.transmit_time;
 
@@ -3601,7 +3605,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
        if (dev) {
                sll->sll_hatype = dev->type;
                sll->sll_halen = dev->addr_len;
-               memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
+               memcpy(sll->sll_addr_flex, dev->dev_addr, dev->addr_len);
        } else {
                sll->sll_hatype = 0;    /* Bad: we have no ARPHRD_UNSPEC */
                sll->sll_halen = 0;
index f7887f4..9d3f26b 100644 (file)
@@ -1320,7 +1320,7 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
                        return ERR_PTR(err);
                }
        } else {
-               if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) {
+               if (strscpy(act_name, "police", IFNAMSIZ) < 0) {
                        NL_SET_ERR_MSG(extack, "TC action name too long");
                        return ERR_PTR(-EINVAL);
                }
index 466c26d..382c7a7 100644 (file)
@@ -406,56 +406,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
        return 0;
 }
 
-static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
-                            struct cls_bpf_prog *prog, unsigned long base,
-                            struct nlattr **tb, struct nlattr *est, u32 flags,
-                            struct netlink_ext_ack *extack)
-{
-       bool is_bpf, is_ebpf, have_exts = false;
-       u32 gen_flags = 0;
-       int ret;
-
-       is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
-       is_ebpf = tb[TCA_BPF_FD];
-       if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
-               return -EINVAL;
-
-       ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
-                               extack);
-       if (ret < 0)
-               return ret;
-
-       if (tb[TCA_BPF_FLAGS]) {
-               u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
-
-               if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
-                       return -EINVAL;
-
-               have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
-       }
-       if (tb[TCA_BPF_FLAGS_GEN]) {
-               gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
-               if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
-                   !tc_flags_valid(gen_flags))
-                       return -EINVAL;
-       }
-
-       prog->exts_integrated = have_exts;
-       prog->gen_flags = gen_flags;
-
-       ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
-                      cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
-       if (ret < 0)
-               return ret;
-
-       if (tb[TCA_BPF_CLASSID]) {
-               prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
-               tcf_bind_filter(tp, &prog->res, base);
-       }
-
-       return 0;
-}
-
 static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                          struct tcf_proto *tp, unsigned long base,
                          u32 handle, struct nlattr **tca,
@@ -463,9 +413,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                          struct netlink_ext_ack *extack)
 {
        struct cls_bpf_head *head = rtnl_dereference(tp->root);
+       bool is_bpf, is_ebpf, have_exts = false;
        struct cls_bpf_prog *oldprog = *arg;
        struct nlattr *tb[TCA_BPF_MAX + 1];
+       bool bound_to_filter = false;
        struct cls_bpf_prog *prog;
+       u32 gen_flags = 0;
        int ret;
 
        if (tca[TCA_OPTIONS] == NULL)
@@ -504,11 +457,51 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
                goto errout;
        prog->handle = handle;
 
-       ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
-                               extack);
+       is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+       is_ebpf = tb[TCA_BPF_FD];
+       if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
+               ret = -EINVAL;
+               goto errout_idr;
+       }
+
+       ret = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &prog->exts,
+                               flags, extack);
+       if (ret < 0)
+               goto errout_idr;
+
+       if (tb[TCA_BPF_FLAGS]) {
+               u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+
+               if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
+                       ret = -EINVAL;
+                       goto errout_idr;
+               }
+
+               have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+       }
+       if (tb[TCA_BPF_FLAGS_GEN]) {
+               gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+               if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+                   !tc_flags_valid(gen_flags)) {
+                       ret = -EINVAL;
+                       goto errout_idr;
+               }
+       }
+
+       prog->exts_integrated = have_exts;
+       prog->gen_flags = gen_flags;
+
+       ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+               cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
        if (ret < 0)
                goto errout_idr;
 
+       if (tb[TCA_BPF_CLASSID]) {
+               prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+               tcf_bind_filter(tp, &prog->res, base);
+               bound_to_filter = true;
+       }
+
        ret = cls_bpf_offload(tp, prog, oldprog, extack);
        if (ret)
                goto errout_parms;
@@ -530,6 +523,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
        return 0;
 
 errout_parms:
+       if (bound_to_filter)
+               tcf_unbind_filter(tp, &prog->res);
        cls_bpf_free_parms(prog);
 errout_idr:
        if (!oldprog)
index 56065cc..9f0711d 100644 (file)
@@ -776,7 +776,8 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
        [TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]    = { .type = NLA_U32 },
 };
 
-static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = {
+static const struct nla_policy
+cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX + 1] = {
        [TCA_FLOWER_KEY_CFM_MD_LEVEL]   = NLA_POLICY_MAX(NLA_U8,
                                                FLOW_DIS_CFM_MDL_MAX),
        [TCA_FLOWER_KEY_CFM_OPCODE]     = { .type = NLA_U8 },
@@ -812,6 +813,16 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
                       TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_range.tp_max.src,
                       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
 
+       if (mask->tp_range.tp_min.dst != mask->tp_range.tp_max.dst) {
+               NL_SET_ERR_MSG(extack,
+                              "Both min and max destination ports must be specified");
+               return -EINVAL;
+       }
+       if (mask->tp_range.tp_min.src != mask->tp_range.tp_max.src) {
+               NL_SET_ERR_MSG(extack,
+                              "Both min and max source ports must be specified");
+               return -EINVAL;
+       }
        if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
            ntohs(key->tp_range.tp_max.dst) <=
            ntohs(key->tp_range.tp_min.dst)) {
@@ -1699,7 +1710,7 @@ static int fl_set_key_cfm(struct nlattr **tb,
                          struct fl_flow_key *mask,
                          struct netlink_ext_ack *extack)
 {
-       struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX];
+       struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX + 1];
        int err;
 
        if (!tb[TCA_FLOWER_KEY_CFM])
@@ -2163,53 +2174,6 @@ static bool fl_needs_tc_skb_ext(const struct fl_flow_key *mask)
        return mask->meta.l2_miss;
 }
 
-static int fl_set_parms(struct net *net, struct tcf_proto *tp,
-                       struct cls_fl_filter *f, struct fl_flow_mask *mask,
-                       unsigned long base, struct nlattr **tb,
-                       struct nlattr *est,
-                       struct fl_flow_tmplt *tmplt,
-                       u32 flags, u32 fl_flags,
-                       struct netlink_ext_ack *extack)
-{
-       int err;
-
-       err = tcf_exts_validate_ex(net, tp, tb, est, &f->exts, flags,
-                                  fl_flags, extack);
-       if (err < 0)
-               return err;
-
-       if (tb[TCA_FLOWER_CLASSID]) {
-               f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
-               if (flags & TCA_ACT_FLAGS_NO_RTNL)
-                       rtnl_lock();
-               tcf_bind_filter(tp, &f->res, base);
-               if (flags & TCA_ACT_FLAGS_NO_RTNL)
-                       rtnl_unlock();
-       }
-
-       err = fl_set_key(net, tb, &f->key, &mask->key, extack);
-       if (err)
-               return err;
-
-       fl_mask_update_range(mask);
-       fl_set_masked_key(&f->mkey, &f->key, mask);
-
-       if (!fl_mask_fits_tmplt(tmplt, mask)) {
-               NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
-               return -EINVAL;
-       }
-
-       /* Enable tc skb extension if filter matches on data extracted from
-        * this extension.
-        */
-       if (fl_needs_tc_skb_ext(&mask->key)) {
-               f->needs_tc_skb_ext = 1;
-               tc_skb_ext_tc_enable();
-       }
-
-       return 0;
-}
-
 static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
                               struct cls_fl_filter *fold,
                               bool *in_ht)
@@ -2241,6 +2205,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
        struct cls_fl_head *head = fl_head_dereference(tp);
        bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
        struct cls_fl_filter *fold = *arg;
+       bool bound_to_filter = false;
        struct cls_fl_filter *fnew;
        struct fl_flow_mask *mask;
        struct nlattr **tb;
@@ -2325,15 +2290,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
        if (err < 0)
                goto errout_idr;
 
-       err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
-                          tp->chain->tmplt_priv, flags, fnew->flags,
-                          extack);
-       if (err)
+       err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
+                                  &fnew->exts, flags, fnew->flags,
+                                  extack);
+       if (err < 0)
                goto errout_idr;
 
+       if (tb[TCA_FLOWER_CLASSID]) {
+               fnew->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
+                       rtnl_lock();
+               tcf_bind_filter(tp, &fnew->res, base);
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
+                       rtnl_unlock();
+               bound_to_filter = true;
+       }
+
+       err = fl_set_key(net, tb, &fnew->key, &mask->key, extack);
+       if (err)
+               goto unbind_filter;
+
+       fl_mask_update_range(mask);
+       fl_set_masked_key(&fnew->mkey, &fnew->key, mask);
+
+       if (!fl_mask_fits_tmplt(tp->chain->tmplt_priv, mask)) {
+               NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
+               err = -EINVAL;
+               goto unbind_filter;
+       }
+
+       /* Enable tc skb extension if filter matches on data extracted from
+        * this extension.
+        */
+       if (fl_needs_tc_skb_ext(&mask->key)) {
+               fnew->needs_tc_skb_ext = 1;
+               tc_skb_ext_tc_enable();
+       }
+
        err = fl_check_assign_mask(head, fnew, fold, mask);
        if (err)
-               goto errout_idr;
+               goto unbind_filter;
 
        err = fl_ht_insert_unique(fnew, fold, &in_ht);
        if (err)
@@ -2424,6 +2420,16 @@ errout_hw:
                                       fnew->mask->filter_ht_params);
 errout_mask:
        fl_mask_put(head, fnew->mask);
+
+unbind_filter:
+       if (bound_to_filter) {
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
+                       rtnl_lock();
+               tcf_unbind_filter(tp, &fnew->res);
+               if (flags & TCA_ACT_FLAGS_NO_RTNL)
+                       rtnl_unlock();
+       }
+
 errout_idr:
        if (!fold)
                idr_remove(&head->handle_idr, fnew->handle);
index ae9439a..c49d6af 100644 (file)
@@ -212,11 +212,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
        if (err < 0)
                return err;
 
-       if (tb[TCA_FW_CLASSID]) {
-               f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
-               tcf_bind_filter(tp, &f->res, base);
-       }
-
        if (tb[TCA_FW_INDEV]) {
                int ret;
                ret = tcf_change_indev(net, tb[TCA_FW_INDEV], extack);
@@ -233,6 +228,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
        } else if (head->mask != 0xFFFFFFFF)
                return err;
 
+       if (tb[TCA_FW_CLASSID]) {
+               f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
+               tcf_bind_filter(tp, &f->res, base);
+       }
+
        return 0;
 }
 
@@ -267,7 +267,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
                        return -ENOBUFS;
 
                fnew->id = f->id;
-               fnew->res = f->res;
                fnew->ifindex = f->ifindex;
                fnew->tp = f->tp;
 
index fa3bbd1..c4ed11d 100644 (file)
@@ -159,26 +159,6 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
        [TCA_MATCHALL_FLAGS]            = { .type = NLA_U32 },
 };
 
-static int mall_set_parms(struct net *net, struct tcf_proto *tp,
-                         struct cls_mall_head *head,
-                         unsigned long base, struct nlattr **tb,
-                         struct nlattr *est, u32 flags, u32 fl_flags,
-                         struct netlink_ext_ack *extack)
-{
-       int err;
-
-       err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags,
-                                  fl_flags, extack);
-       if (err < 0)
-               return err;
-
-       if (tb[TCA_MATCHALL_CLASSID]) {
-               head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
-               tcf_bind_filter(tp, &head->res, base);
-       }
-       return 0;
-}
-
 static int mall_change(struct net *net, struct sk_buff *in_skb,
                       struct tcf_proto *tp, unsigned long base,
                       u32 handle, struct nlattr **tca,
@@ -187,6 +167,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
 {
        struct cls_mall_head *head = rtnl_dereference(tp->root);
        struct nlattr *tb[TCA_MATCHALL_MAX + 1];
+       bool bound_to_filter = false;
        struct cls_mall_head *new;
        u32 userflags = 0;
        int err;
@@ -226,11 +207,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
                goto err_alloc_percpu;
        }
 
-       err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE],
-                            flags, new->flags, extack);
-       if (err)
+       err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
+                                  &new->exts, flags, new->flags, extack);
+       if (err < 0)
                goto err_set_parms;
 
+       if (tb[TCA_MATCHALL_CLASSID]) {
+               new->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+               tcf_bind_filter(tp, &new->res, base);
+               bound_to_filter = true;
+       }
+
        if (!tc_skip_hw(new->flags)) {
                err = mall_replace_hw_filter(tp, new, (unsigned long)new,
                                             extack);
@@ -246,6 +233,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
        return 0;
 
 err_replace_hw_filter:
+       if (bound_to_filter)
+               tcf_unbind_filter(tp, &new->res);
 err_set_parms:
        free_percpu(new->pf);
 err_alloc_percpu:
index d0c5372..1e20bbd 100644 (file)
@@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
        if (fold) {
                f->id = fold->id;
                f->iif = fold->iif;
-               f->res = fold->res;
                f->handle = fold->handle;
 
                f->tp = fold->tp;
index d15d50d..da4c179 100644 (file)
@@ -712,8 +712,23 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
        [TCA_U32_FLAGS]         = { .type = NLA_U32 },
 };
 
+static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
+                             struct nlattr **tb)
+{
+       if (tb[TCA_U32_CLASSID])
+               tcf_unbind_filter(tp, &n->res);
+}
+
+static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
+                           unsigned long base, struct nlattr **tb)
+{
+       if (tb[TCA_U32_CLASSID]) {
+               n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
+               tcf_bind_filter(tp, &n->res, base);
+       }
+}
+
 static int u32_set_parms(struct net *net, struct tcf_proto *tp,
-                        unsigned long base,
                         struct tc_u_knode *n, struct nlattr **tb,
                         struct nlattr *est, u32 flags, u32 fl_flags,
                         struct netlink_ext_ack *extack)
@@ -760,10 +775,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
                if (ht_old)
                        ht_old->refcnt--;
        }
-       if (tb[TCA_U32_CLASSID]) {
-               n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
-               tcf_bind_filter(tp, &n->res, base);
-       }
 
        if (ifindex >= 0)
                n->ifindex = ifindex;
@@ -815,7 +826,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
 
        new->ifindex = n->ifindex;
        new->fshift = n->fshift;
-       new->res = n->res;
        new->flags = n->flags;
        RCU_INIT_POINTER(new->ht_down, ht);
 
@@ -903,17 +913,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                if (!new)
                        return -ENOMEM;
 
-               err = u32_set_parms(net, tp, base, new, tb,
-                                   tca[TCA_RATE], flags, new->flags,
-                                   extack);
+               err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE],
+                                   flags, new->flags, extack);
 
                if (err) {
                        __u32_destroy_key(new);
                        return err;
                }
 
+               u32_bind_filter(tp, new, base, tb);
+
                err = u32_replace_hw_knode(tp, new, flags, extack);
                if (err) {
+                       u32_unbind_filter(tp, new, tb);
+
+                       if (tb[TCA_U32_LINK]) {
+                               struct tc_u_hnode *ht_old;
+
+                               ht_old = rtnl_dereference(n->ht_down);
+                               if (ht_old)
+                                       ht_old->refcnt++;
+                       }
                        __u32_destroy_key(new);
                        return err;
                }
@@ -1003,18 +1023,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                return -EINVAL;
        }
 
+       /* At this point, we need to derive the new handle that will be used to
+        * uniquely map the identity of this table match entry. The
+        * identity of the entry that we need to construct is 32 bits made of:
+        *     htid(12b):bucketid(8b):node/entryid(12b)
+        *
+        * At this point _we have the table(ht)_ in which we will insert this
+        * entry. We carry the table's id in variable "htid".
+        * Note that earlier code picked the ht selection either by a) the user
+        * providing the htid specified via TCA_U32_HASH attribute or b) when
+        * no such attribute is passed then the root ht, is default to at ID
+        * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
+        * If OTOH the user passed us the htid, they may also pass a bucketid of
+        * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
+        * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
+        * passed via the htid, so even if it was non-zero it will be ignored.
+        *
+        * We may also have a handle, if the user passed one. The handle also
+        * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
+        * Rule: the bucketid on the handle is ignored even if one was passed;
+        * rather the value on "htid" is always assumed to be the bucketid.
+        */
        if (handle) {
+               /* Rule: The htid from handle and tableid from htid must match */
                if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
                        NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
                        return -EINVAL;
                }
-               handle = htid | TC_U32_NODE(handle);
-               err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
-                                   GFP_KERNEL);
-               if (err)
-                       return err;
-       } else
+               /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
+                * need to finalize the table entry identification with the last
+                * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
+                * entries. Rule: nodeid of 0 is reserved only for tables(see
+                * earlier code which processes TC_U32_DIVISOR attribute).
+                * Rule: The nodeid can only be derived from the handle (and not
+                * htid).
+                * Rule: if the handle specified zero for the node id example
+                * 0x60000000, then pick a new nodeid from the pool of IDs
+                * this hash table has been allocating from.
+                * If OTOH it is specified (i.e for example the user passed a
+                * handle such as 0x60000123), then we use it generate our final
+                * handle which is used to uniquely identify the match entry.
+                */
+               if (!TC_U32_NODE(handle)) {
+                       handle = gen_new_kid(ht, htid);
+               } else {
+                       handle = htid | TC_U32_NODE(handle);
+                       err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
+                                           handle, GFP_KERNEL);
+                       if (err)
+                               return err;
+               }
+       } else {
+               /* The user did not give us a handle; lets just generate one
+                * from the table's pool of nodeids.
+                */
                handle = gen_new_kid(ht, htid);
+       }
 
        if (tb[TCA_U32_SEL] == NULL) {
                NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
@@ -1074,15 +1138,18 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
        }
 #endif
 
-       err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE],
+       err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE],
                            flags, n->flags, extack);
+
+       u32_bind_filter(tp, n, base, tb);
+
        if (err == 0) {
                struct tc_u_knode __rcu **ins;
                struct tc_u_knode *pins;
 
                err = u32_replace_hw_knode(tp, n, flags, extack);
                if (err)
-                       goto errhw;
+                       goto errunbind;
 
                if (!tc_in_hw(n->flags))
                        n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
@@ -1100,7 +1167,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
                return 0;
        }
 
-errhw:
+errunbind:
+       u32_unbind_filter(tp, n, tb);
+
 #ifdef CONFIG_CLS_U32_MARK
        free_percpu(n->pcpu_success);
 #endif
index af85a73..6fdba06 100644 (file)
@@ -568,7 +568,7 @@ META_COLLECTOR(int_sk_rcvtimeo)
                *err = -1;
                return;
        }
-       dst->value = sk->sk_rcvtimeo / HZ;
+       dst->value = READ_ONCE(sk->sk_rcvtimeo) / HZ;
 }
 
 META_COLLECTOR(int_sk_sndtimeo)
@@ -579,7 +579,7 @@ META_COLLECTOR(int_sk_sndtimeo)
                *err = -1;
                return;
        }
-       dst->value = sk->sk_sndtimeo / HZ;
+       dst->value = READ_ONCE(sk->sk_sndtimeo) / HZ;
 }
 
 META_COLLECTOR(int_sk_sendmsg_off)
index aa6b1fe..e9eaf63 100644 (file)
@@ -1547,10 +1547,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
        return 0;
 }
 
+static bool req_create_or_replace(struct nlmsghdr *n)
+{
+       return (n->nlmsg_flags & NLM_F_CREATE &&
+               n->nlmsg_flags & NLM_F_REPLACE);
+}
+
+static bool req_create_exclusive(struct nlmsghdr *n)
+{
+       return (n->nlmsg_flags & NLM_F_CREATE &&
+               n->nlmsg_flags & NLM_F_EXCL);
+}
+
+static bool req_change(struct nlmsghdr *n)
+{
+       return (!(n->nlmsg_flags & NLM_F_CREATE) &&
+               !(n->nlmsg_flags & NLM_F_REPLACE) &&
+               !(n->nlmsg_flags & NLM_F_EXCL));
+}
+
 /*
  * Create/change qdisc.
  */
-
 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
                           struct netlink_ext_ack *extack)
 {
@@ -1644,27 +1662,35 @@ replay:
                                 *
                                 *   We know, that some child q is already
                                 *   attached to this parent and have choice:
-                                *   either to change it or to create/graft new one.
+                                *   1) change it or 2) create/graft new one.
+                                *   If the requested qdisc kind is different
+                                *   than the existing one, then we choose graft.
+                                *   If they are the same then this is "change"
+                                *   operation - just let it fallthrough..
                                 *
                                 *   1. We are allowed to create/graft only
-                                *   if CREATE and REPLACE flags are set.
+                                *   if the request is explicitly stating
+                                *   "please create if it doesn't exist".
                                 *
-                                *   2. If EXCL is set, requestor wanted to say,
-                                *   that qdisc tcm_handle is not expected
+                                *   2. If the request is to exclusive create
+                                *   then the qdisc tcm_handle is not expected
                                 *   to exist, so that we choose create/graft too.
                                 *
                                 *   3. The last case is when no flags are set.
+                                *   This will happen when for example tc
+                                *   utility issues a "change" command.
                                 *   Alas, it is sort of hole in API, we
                                 *   cannot decide what to do unambiguously.
-                                *   For now we select create/graft, if
-                                *   user gave KIND, which does not match existing.
+                                *   For now we select create/graft.
                                 */
-                               if ((n->nlmsg_flags & NLM_F_CREATE) &&
-                                   (n->nlmsg_flags & NLM_F_REPLACE) &&
-                                   ((n->nlmsg_flags & NLM_F_EXCL) ||
-                                    (tca[TCA_KIND] &&
-                                     nla_strcmp(tca[TCA_KIND], q->ops->id))))
-                                       goto create_n_graft;
+                               if (tca[TCA_KIND] &&
+                                   nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+                                       if (req_create_or_replace(n) ||
+                                           req_create_exclusive(n))
+                                               goto create_n_graft;
+                                       else if (req_change(n))
+                                               goto create_n_graft2;
+                               }
                        }
                }
        } else {
@@ -1698,6 +1724,7 @@ create_n_graft:
                NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
                return -ENOENT;
        }
+create_n_graft2:
        if (clid == TC_H_INGRESS) {
                if (dev_ingress_queue(dev)) {
                        q = qdisc_create(dev, dev_ingress_queue(dev),
index ab69ff7..793009f 100644 (file)
@@ -290,6 +290,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
                                                    "Attribute type expected to be TCA_MQPRIO_MIN_RATE64");
                                return -EINVAL;
                        }
+
+                       if (nla_len(attr) != sizeof(u64)) {
+                               NL_SET_ERR_MSG_ATTR(extack, attr,
+                                                   "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length");
+                               return -EINVAL;
+                       }
+
                        if (i >= qopt->num_tc)
                                break;
                        priv->min_rate[i] = nla_get_u64(attr);
@@ -312,6 +319,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
                                                    "Attribute type expected to be TCA_MQPRIO_MAX_RATE64");
                                return -EINVAL;
                        }
+
+                       if (nla_len(attr) != sizeof(u64)) {
+                               NL_SET_ERR_MSG_ATTR(extack, attr,
+                                                   "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length");
+                               return -EINVAL;
+                       }
+
                        if (i >= qopt->num_tc)
                                break;
                        priv->max_rate[i] = nla_get_u64(attr);
index dfd9a99..befaf74 100644 (file)
@@ -381,8 +381,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
                           u32 lmax)
 {
        struct qfq_sched *q = qdisc_priv(sch);
-       struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
+       struct qfq_aggregate *new_agg;
 
+       /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */
+       if (lmax > QFQ_MAX_LMAX)
+               return -EINVAL;
+
+       new_agg = qfq_find_agg(q, lmax, weight);
        if (new_agg == NULL) { /* create new aggregate */
                new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
                if (new_agg == NULL)
@@ -423,10 +428,17 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
        else
                weight = 1;
 
-       if (tb[TCA_QFQ_LMAX])
+       if (tb[TCA_QFQ_LMAX]) {
                lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
-       else
+       } else {
+               /* MTU size is user controlled */
                lmax = psched_mtu(qdisc_dev(sch));
+               if (lmax < QFQ_MIN_LMAX || lmax > QFQ_MAX_LMAX) {
+                       NL_SET_ERR_MSG_MOD(extack,
+                                          "MTU size out of bounds for qfq");
+                       return -EINVAL;
+               }
+       }
 
        inv_w = ONE_FP / weight;
        weight = ONE_FP / inv_w;
index 717ae51..8c9cfff 100644 (file)
@@ -1015,6 +1015,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
                                                              TC_FP_PREEMPTIBLE),
 };
 
+static struct netlink_range_validation_signed taprio_cycle_time_range = {
+       .min = 0,
+       .max = INT_MAX,
+};
+
 static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
        [TCA_TAPRIO_ATTR_PRIOMAP]              = {
                .len = sizeof(struct tc_mqprio_qopt)
@@ -1023,7 +1028,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
        [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]            = { .type = NLA_S64 },
        [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]         = { .type = NLA_NESTED },
        [TCA_TAPRIO_ATTR_SCHED_CLOCKID]              = { .type = NLA_S32 },
-       [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           = { .type = NLA_S64 },
+       [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           =
+               NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
        [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
        [TCA_TAPRIO_ATTR_FLAGS]                      = { .type = NLA_U32 },
        [TCA_TAPRIO_ATTR_TXTIME_DELAY]               = { .type = NLA_U32 },
@@ -1159,6 +1165,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
                        return -EINVAL;
                }
 
+               if (cycle < 0 || cycle > INT_MAX) {
+                       NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
+                       return -EINVAL;
+               }
+
                new->cycle_time = cycle;
        }
 
@@ -1347,7 +1358,7 @@ static void setup_txtime(struct taprio_sched *q,
                         struct sched_gate_list *sched, ktime_t base)
 {
        struct sched_entry *entry;
-       u32 interval = 0;
+       u64 interval = 0;
 
        list_for_each_entry(entry, &sched->entries, list) {
                entry->next_txtime = ktime_add_ns(base, interval);
index 9388d98..76f1bce 100644 (file)
@@ -99,7 +99,7 @@ struct percpu_counter sctp_sockets_allocated;
 
 static void sctp_enter_memory_pressure(struct sock *sk)
 {
-       sctp_memory_pressure = 1;
+       WRITE_ONCE(sctp_memory_pressure, 1);
 }
 
 
@@ -9479,7 +9479,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
        newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
        newinet->inet_dport = htons(asoc->peer.port);
        newinet->pmtudisc = inet->pmtudisc;
-       newinet->inet_id = get_random_u16();
+       atomic_set(&newinet->inet_id, get_random_u16());
 
        newinet->uc_ttl = inet->uc_ttl;
        newinet->mc_loop = 1;
index a7f887d..f5834af 100644 (file)
@@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
        sk->sk_state = SMC_INIT;
        sk->sk_destruct = smc_destruct;
        sk->sk_protocol = protocol;
-       WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
-       WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+       WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
+       WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
        smc = smc_sk(sk);
        INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
        INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -436,24 +436,9 @@ out:
        return rc;
 }
 
-static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
-                                  unsigned long mask)
-{
-       /* options we don't get control via setsockopt for */
-       nsk->sk_type = osk->sk_type;
-       nsk->sk_sndbuf = osk->sk_sndbuf;
-       nsk->sk_rcvbuf = osk->sk_rcvbuf;
-       nsk->sk_sndtimeo = osk->sk_sndtimeo;
-       nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
-       nsk->sk_mark = osk->sk_mark;
-       nsk->sk_priority = osk->sk_priority;
-       nsk->sk_rcvlowat = osk->sk_rcvlowat;
-       nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
-       nsk->sk_err = osk->sk_err;
-
-       nsk->sk_flags &= ~mask;
-       nsk->sk_flags |= osk->sk_flags & mask;
-}
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
 
 #define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
                             (1UL << SOCK_KEEPOPEN) | \
@@ -470,9 +455,55 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
                             (1UL << SOCK_NOFCS) | \
                             (1UL << SOCK_FILTER_LOCKED) | \
                             (1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+                                    unsigned long mask)
+{
+       struct net *nnet = sock_net(nsk);
+
+       nsk->sk_userlocks = osk->sk_userlocks;
+       if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+               nsk->sk_sndbuf = osk->sk_sndbuf;
+       } else {
+               if (mask == SK_FLAGS_SMC_TO_CLC)
+                       WRITE_ONCE(nsk->sk_sndbuf,
+                                  READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+               else
+                       WRITE_ONCE(nsk->sk_sndbuf,
+                                  2 * READ_ONCE(nnet->smc.sysctl_wmem));
+       }
+       if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+               nsk->sk_rcvbuf = osk->sk_rcvbuf;
+       } else {
+               if (mask == SK_FLAGS_SMC_TO_CLC)
+                       WRITE_ONCE(nsk->sk_rcvbuf,
+                                  READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+               else
+                       WRITE_ONCE(nsk->sk_rcvbuf,
+                                  2 * READ_ONCE(nnet->smc.sysctl_rmem));
+       }
+}
+
+static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
+                                  unsigned long mask)
+{
+       /* options we don't get control via setsockopt for */
+       nsk->sk_type = osk->sk_type;
+       nsk->sk_sndtimeo = osk->sk_sndtimeo;
+       nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
+       nsk->sk_mark = READ_ONCE(osk->sk_mark);
+       nsk->sk_priority = osk->sk_priority;
+       nsk->sk_rcvlowat = osk->sk_rcvlowat;
+       nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
+       nsk->sk_err = osk->sk_err;
+
+       nsk->sk_flags &= ~mask;
+       nsk->sk_flags |= osk->sk_flags & mask;
+
+       smc_adjust_sock_bufsizes(nsk, osk, mask);
+}
+
 static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
 {
        smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -2479,8 +2510,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
                sock_hold(lsk); /* sock_put in smc_listen_work */
                INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
                smc_copy_sock_settings_to_smc(new_smc);
-               new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
-               new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
                sock_hold(&new_smc->sk); /* sock_put in passive closing */
                if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
                        sock_put(&new_smc->sk);
index 2eeea4c..1f2b912 100644 (file)
@@ -161,7 +161,7 @@ struct smc_connection {
 
        struct smc_buf_desc     *sndbuf_desc;   /* send buffer descriptor */
        struct smc_buf_desc     *rmb_desc;      /* RMBE descriptor */
-       int                     rmbe_size_short;/* compressed notation */
+       int                     rmbe_size_comp; /* compressed notation */
        int                     rmbe_update_limit;
                                                /* lower limit for consumer
                                                 * cursor update
index b9b8b07..c90d9e5 100644 (file)
@@ -1007,7 +1007,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
                clc->d0.gid =
                        conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
                clc->d0.token = conn->rmb_desc->token;
-               clc->d0.dmbe_size = conn->rmbe_size_short;
+               clc->d0.dmbe_size = conn->rmbe_size_comp;
                clc->d0.dmbe_idx = 0;
                memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
                if (version == SMC_V1) {
@@ -1050,7 +1050,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
                        clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
                        break;
                }
-               clc->r0.rmbe_size = conn->rmbe_size_short;
+               clc->r0.rmbe_size = conn->rmbe_size_comp;
                clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
                        cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
                        cpu_to_be64((u64)sg_dma_address
index 3f465fa..6b78075 100644 (file)
@@ -2309,31 +2309,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
        struct smc_connection *conn = &smc->conn;
        struct smc_link_group *lgr = conn->lgr;
        struct list_head *buf_list;
-       int bufsize, bufsize_short;
+       int bufsize, bufsize_comp;
        struct rw_semaphore *lock;      /* lock buffer list */
        bool is_dgraded = false;
-       int sk_buf_size;
 
        if (is_rmb)
                /* use socket recv buffer size (w/o overhead) as start value */
-               sk_buf_size = smc->sk.sk_rcvbuf;
+               bufsize = smc->sk.sk_rcvbuf / 2;
        else
                /* use socket send buffer size (w/o overhead) as start value */
-               sk_buf_size = smc->sk.sk_sndbuf;
+               bufsize = smc->sk.sk_sndbuf / 2;
 
-       for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
-            bufsize_short >= 0; bufsize_short--) {
+       for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
+            bufsize_comp >= 0; bufsize_comp--) {
                if (is_rmb) {
                        lock = &lgr->rmbs_lock;
-                       buf_list = &lgr->rmbs[bufsize_short];
+                       buf_list = &lgr->rmbs[bufsize_comp];
                } else {
                        lock = &lgr->sndbufs_lock;
-                       buf_list = &lgr->sndbufs[bufsize_short];
+                       buf_list = &lgr->sndbufs[bufsize_comp];
                }
-               bufsize = smc_uncompress_bufsize(bufsize_short);
+               bufsize = smc_uncompress_bufsize(bufsize_comp);
 
                /* check for reusable slot in the link group */
-               buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
+               buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
                if (buf_desc) {
                        buf_desc->is_dma_need_sync = 0;
                        SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
@@ -2377,8 +2376,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
        if (is_rmb) {
                conn->rmb_desc = buf_desc;
-               conn->rmbe_size_short = bufsize_short;
-               smc->sk.sk_rcvbuf = bufsize;
+               conn->rmbe_size_comp = bufsize_comp;
+               smc->sk.sk_rcvbuf = bufsize * 2;
                atomic_set(&conn->bytes_to_rcv, 0);
                conn->rmbe_update_limit =
                        smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2386,7 +2385,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
                        smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
        } else {
                conn->sndbuf_desc = buf_desc;
-               smc->sk.sk_sndbuf = bufsize;
+               smc->sk.sk_sndbuf = bufsize * 2;
                atomic_set(&conn->sndbuf_space, bufsize);
        }
        return 0;
index b6f79fa..0b2a957 100644 (file)
 
 static int min_sndbuf = SMC_BUF_MIN_SIZE;
 static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int max_sndbuf = INT_MAX / 2;
+static int max_rcvbuf = INT_MAX / 2;
+static const int net_smc_wmem_init = (64 * 1024);
+static const int net_smc_rmem_init = (64 * 1024);
 
 static struct ctl_table smc_table[] = {
        {
@@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &min_sndbuf,
+               .extra2         = &max_sndbuf,
        },
        {
                .procname       = "rmem",
@@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &min_rcvbuf,
+               .extra2         = &max_rcvbuf,
        },
        {  }
 };
@@ -88,8 +94,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
        net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
        net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
        net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
-       WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
-       WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+       WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
+       WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
 
        return 0;
 
index 0b6034f..f420d84 100644 (file)
@@ -472,7 +472,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
                return NULL;
        inode->i_ino = get_next_ino();
        inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        switch (mode & S_IFMT) {
        case S_IFDIR:
                inode->i_fop = &simple_dir_operations;
index e43f263..2eb8df4 100644 (file)
@@ -1244,6 +1244,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
        if (ret != head->iov_len)
                goto out;
 
+       if (xdr_buf_pagecount(xdr))
+               xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base);
+
        msg.msg_flags = MSG_SPLICE_PAGES;
        iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
                      xdr_buf_pagecount(xdr), xdr->page_len);
index b098fde..28c0771 100644 (file)
@@ -935,9 +935,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
        if (!rep->rr_rdmabuf)
                goto out_free;
 
-       if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
-               goto out_free_regbuf;
-
        rep->rr_cid.ci_completion_id =
                atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
 
@@ -956,8 +953,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
        spin_unlock(&buf->rb_lock);
        return rep;
 
-out_free_regbuf:
-       rpcrdma_regbuf_free(rep->rr_rdmabuf);
 out_free:
        kfree(rep);
 out:
@@ -1363,6 +1358,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
                        rep = rpcrdma_rep_create(r_xprt, temp);
                if (!rep)
                        break;
+               if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
+                       rpcrdma_rep_put(buf, rep);
+                       break;
+               }
 
                rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
                trace_xprtrdma_post_recv(rep);
index 577fa5a..302fd74 100644 (file)
@@ -1960,7 +1960,8 @@ rcv:
 
        skb_reset_network_header(*skb);
        skb_pull(*skb, tipc_ehdr_size(ehdr));
-       pskb_trim(*skb, (*skb)->len - aead->authsize);
+       if (pskb_trim(*skb, (*skb)->len - aead->authsize))
+               goto free_skb;
 
        /* Validate TIPCv2 message */
        if (unlikely(!tipc_msg_validate(skb))) {
index 5e000fd..a9c5b65 100644 (file)
@@ -583,7 +583,7 @@ update:
                                 n->capabilities, &n->bc_entry.inputq1,
                                 &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
                pr_warn("Broadcast rcv link creation failed, no memory\n");
-               kfree(n);
+               tipc_node_put(n);
                n = NULL;
                goto exit;
        }
index 2021fe5..529101e 100644 (file)
@@ -52,6 +52,8 @@ static LIST_HEAD(tls_device_list);
 static LIST_HEAD(tls_device_down_list);
 static DEFINE_SPINLOCK(tls_device_lock);
 
+static struct page *dummy_page;
+
 static void tls_device_free_ctx(struct tls_context *ctx)
 {
        if (ctx->tx_conf == TLS_HW) {
@@ -312,36 +314,33 @@ static int tls_push_record(struct sock *sk,
        return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
 }
 
-static int tls_device_record_close(struct sock *sk,
-                                  struct tls_context *ctx,
-                                  struct tls_record_info *record,
-                                  struct page_frag *pfrag,
-                                  unsigned char record_type)
+static void tls_device_record_close(struct sock *sk,
+                                   struct tls_context *ctx,
+                                   struct tls_record_info *record,
+                                   struct page_frag *pfrag,
+                                   unsigned char record_type)
 {
        struct tls_prot_info *prot = &ctx->prot_info;
-       int ret;
+       struct page_frag dummy_tag_frag;
 
        /* append tag
         * device will fill in the tag, we just need to append a placeholder
         * use socket memory to improve coalescing (re-using a single buffer
         * increases frag count)
-        * if we can't allocate memory now, steal some back from data
+        * if we can't allocate memory now use the dummy page
         */
-       if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
-                                       sk->sk_allocation))) {
-               ret = 0;
-               tls_append_frag(record, pfrag, prot->tag_size);
-       } else {
-               ret = prot->tag_size;
-               if (record->len <= prot->overhead_size)
-                       return -ENOMEM;
+       if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
+           !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
+               dummy_tag_frag.page = dummy_page;
+               dummy_tag_frag.offset = 0;
+               pfrag = &dummy_tag_frag;
        }
+       tls_append_frag(record, pfrag, prot->tag_size);
 
        /* fill prepend */
        tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
                         record->len - prot->overhead_size,
                         record_type);
-       return ret;
 }
 
 static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
@@ -541,18 +540,8 @@ last_record:
 
                if (done || record->len >= max_open_record_len ||
                    (record->num_frags >= MAX_SKB_FRAGS - 1)) {
-                       rc = tls_device_record_close(sk, tls_ctx, record,
-                                                    pfrag, record_type);
-                       if (rc) {
-                               if (rc > 0) {
-                                       size += rc;
-                               } else {
-                                       size = orig_size;
-                                       destroy_record(record);
-                                       ctx->open_record = NULL;
-                                       break;
-                               }
-                       }
+                       tls_device_record_close(sk, tls_ctx, record,
+                                               pfrag, record_type);
 
                        rc = tls_push_record(sk,
                                             tls_ctx,
@@ -1450,14 +1439,26 @@ int __init tls_device_init(void)
 {
        int err;
 
-       destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
-       if (!destruct_wq)
+       dummy_page = alloc_page(GFP_KERNEL);
+       if (!dummy_page)
                return -ENOMEM;
 
+       destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+       if (!destruct_wq) {
+               err = -ENOMEM;
+               goto err_free_dummy;
+       }
+
        err = register_netdevice_notifier(&tls_dev_notifier);
        if (err)
-               destroy_workqueue(destruct_wq);
+               goto err_destroy_wq;
 
+       return 0;
+
+err_destroy_wq:
+       destroy_workqueue(destruct_wq);
+err_free_dummy:
+       put_page(dummy_page);
        return err;
 }
 
@@ -1466,4 +1467,5 @@ void __exit tls_device_cleanup(void)
        unregister_netdevice_notifier(&tls_dev_notifier);
        destroy_workqueue(destruct_wq);
        clean_acked_data_flush();
+       put_page(dummy_page);
 }
index b689612..4a8ee2f 100644 (file)
@@ -139,9 +139,6 @@ int tls_push_sg(struct sock *sk,
 
        ctx->splicing_pages = true;
        while (1) {
-               if (sg_is_last(sg))
-                       msg.msg_flags = flags;
-
                /* is sending application-limited? */
                tcp_rate_check_app_limited(sk);
                p = sg_page(sg);
index 123b35d..86930a8 100644 (file)
@@ -289,17 +289,29 @@ static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
        return 0;
 }
 
-static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
+static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
 {
+       struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
+       short offset = offsetof(struct sockaddr_storage, __data);
+
+       BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
+
        /* This may look like an off by one error but it is a bit more
         * subtle.  108 is the longest valid AF_UNIX path for a binding.
         * sun_path[108] doesn't as such exist.  However in kernel space
         * we are guaranteed that it is a valid memory location in our
         * kernel address buffer because syscall functions always pass
         * a pointer of struct sockaddr_storage which has a bigger buffer
-        * than 108.
+        * than 108.  Also, we must terminate sun_path for strlen() in
+        * getname_kernel().
+        */
+       addr->__data[addr_len - offset] = 0;
+
+       /* Don't pass sunaddr->sun_path to strlen().  Otherwise, 108 will
+        * cause panic if CONFIG_FORTIFY_SOURCE=y.  Let __fortify_strlen()
+        * know the actual buffer.
         */
-       ((char *)sunaddr)[addr_len] = 0;
+       return strlen(addr->__data) + offset + 1;
 }
 
 static void __unix_remove_socket(struct sock *sk)
@@ -778,7 +790,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
        if (mutex_lock_interruptible(&u->iolock))
                return -EINTR;
 
-       sk->sk_peek_off = val;
+       WRITE_ONCE(sk->sk_peek_off, val);
        mutex_unlock(&u->iolock);
 
        return 0;
@@ -1208,10 +1220,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
        struct path parent;
        int err;
 
-       unix_mkname_bsd(sunaddr, addr_len);
-       addr_len = strlen(sunaddr->sun_path) +
-               offsetof(struct sockaddr_un, sun_path) + 1;
-
+       addr_len = unix_mkname_bsd(sunaddr, addr_len);
        addr = unix_create_addr(sunaddr, addr_len);
        if (!addr)
                return -ENOMEM;
index 0da2e6a..8bcf8e2 100644 (file)
@@ -5430,8 +5430,11 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
        if (!wiphy->mbssid_max_interfaces)
                return ERR_PTR(-EINVAL);
 
-       nla_for_each_nested(nl_elems, attrs, rem_elems)
+       nla_for_each_nested(nl_elems, attrs, rem_elems) {
+               if (num_elems >= 255)
+                       return ERR_PTR(-EINVAL);
                num_elems++;
+       }
 
        elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
        if (!elems)
index 8bf00ca..0cf1ce7 100644 (file)
@@ -657,7 +657,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
 
        ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
        if (ret)
-               return ret;
+               return 0;
 
        for_each_element_id(elem, WLAN_EID_REDUCED_NEIGHBOR_REPORT,
                            ies->data, ies->len) {
index 89c9ad6..1783ab9 100644 (file)
@@ -580,6 +580,8 @@ int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb)
                hdrlen += ETH_ALEN + 2;
        else if (!pskb_may_pull(skb, hdrlen))
                return -EINVAL;
+       else
+               payload.eth.h_proto = htons(skb->len - hdrlen);
 
        mesh_addr = skb->data + sizeof(payload.eth) + ETH_ALEN;
        switch (payload.flags & MESH_FLAGS_AE) {
index 31dca4e..10ea85c 100644 (file)
@@ -505,7 +505,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 
        skb->dev = dev;
        skb->priority = xs->sk.sk_priority;
-       skb->mark = xs->sk.sk_mark;
+       skb->mark = READ_ONCE(xs->sk.sk_mark);
        skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
        skb->destructor = xsk_destruct_skb;
 
@@ -994,6 +994,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
                                err = xp_alloc_tx_descs(xs->pool, xs);
                                if (err) {
                                        xp_put_pool(xs->pool);
+                                       xs->pool = NULL;
                                        sockfd_put(sock);
                                        goto out_unlock;
                                }
index 8cbf45a..655fe4f 100644 (file)
@@ -108,7 +108,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
        [XFRMA_ALG_COMP]        = { .len = sizeof(struct xfrm_algo) },
        [XFRMA_ENCAP]           = { .len = sizeof(struct xfrm_encap_tmpl) },
        [XFRMA_TMPL]            = { .len = sizeof(struct xfrm_user_tmpl) },
-       [XFRMA_SEC_CTX]         = { .len = sizeof(struct xfrm_sec_ctx) },
+       [XFRMA_SEC_CTX]         = { .len = sizeof(struct xfrm_user_sec_ctx) },
        [XFRMA_LTIME_VAL]       = { .len = sizeof(struct xfrm_lifetime_cur) },
        [XFRMA_REPLAY_VAL]      = { .len = sizeof(struct xfrm_replay_state) },
        [XFRMA_REPLAY_THRESH]   = { .type = NLA_U32 },
index 815b380..d5ee967 100644 (file)
@@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
        int optlen = 0;
        int err = -EINVAL;
 
+       skb->protocol = htons(ETH_P_IP);
+
        if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
                struct ip_beet_phdr *ph;
                int phlen;
@@ -232,6 +234,8 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
 {
        int err = -EINVAL;
 
+       skb->protocol = htons(ETH_P_IP);
+
        if (!pskb_may_pull(skb, sizeof(struct iphdr)))
                goto out;
 
@@ -267,6 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
 {
        int err = -EINVAL;
 
+       skb->protocol = htons(ETH_P_IPV6);
+
        if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
                goto out;
 
@@ -296,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
        int size = sizeof(struct ipv6hdr);
        int err;
 
+       skb->protocol = htons(ETH_P_IPV6);
+
        err = skb_cow_head(skb, size + skb->mac_len);
        if (err)
                goto out;
@@ -346,6 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
                        return xfrm6_remove_tunnel_encap(x, skb);
                break;
                }
+               return -EINVAL;
        }
 
        WARN_ON_ONCE(1);
@@ -366,19 +375,6 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
                return -EAFNOSUPPORT;
        }
 
-       switch (XFRM_MODE_SKB_CB(skb)->protocol) {
-       case IPPROTO_IPIP:
-       case IPPROTO_BEETPH:
-               skb->protocol = htons(ETH_P_IP);
-               break;
-       case IPPROTO_IPV6:
-               skb->protocol = htons(ETH_P_IPV6);
-               break;
-       default:
-               WARN_ON_ONCE(1);
-               break;
-       }
-
        return xfrm_inner_mode_encap_remove(x, skb);
 }
 
index a331996..b864740 100644 (file)
@@ -537,8 +537,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
 
        switch (skb->protocol) {
        case htons(ETH_P_IPV6):
-               xfrm_decode_session(skb, &fl, AF_INET6);
                memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET6);
                if (!dst) {
                        fl.u.ip6.flowi6_oif = dev->ifindex;
                        fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
@@ -552,8 +552,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
                }
                break;
        case htons(ETH_P_IP):
-               xfrm_decode_session(skb, &fl, AF_INET);
                memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+               xfrm_decode_session(skb, &fl, AF_INET);
                if (!dst) {
                        struct rtable *rt;
 
index e7617c9..d6b4057 100644 (file)
@@ -2250,7 +2250,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
 
                match = xfrm_selector_match(&pol->selector, fl, family);
                if (match) {
-                       if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+                       if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v ||
                            pol->if_id != if_id) {
                                pol = NULL;
                                goto out;
index 49e63ee..bda5327 100644 (file)
@@ -1324,12 +1324,8 @@ found:
                        struct xfrm_dev_offload *xso = &x->xso;
 
                        if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
-                               xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
-                               xso->dir = 0;
-                               netdev_put(xso->dev, &xso->dev_tracker);
-                               xso->dev = NULL;
-                               xso->real_dev = NULL;
-                               xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+                               xfrm_dev_state_delete(x);
+                               xfrm_dev_state_free(x);
                        }
 #endif
                        x->km.state = XFRM_STATE_DEAD;
index c34a2a0..ad01997 100644 (file)
@@ -628,7 +628,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
        struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
        struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH];
 
-       if (re) {
+       if (re && x->replay_esn && x->preplay_esn) {
                struct xfrm_replay_state_esn *replay_esn;
                replay_esn = nla_data(re);
                memcpy(x->replay_esn, replay_esn,
@@ -1267,6 +1267,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
                                         sizeof(*filter), GFP_KERNEL);
                        if (filter == NULL)
                                return -ENOMEM;
+
+                       /* see addr_match(), (prefix length >> 5) << 2
+                        * will be used to compare xfrm_address_t
+                        */
+                       if (filter->splen > (sizeof(xfrm_address_t) << 3) ||
+                           filter->dplen > (sizeof(xfrm_address_t) << 3)) {
+                               kfree(filter);
+                               return -EINVAL;
+                       }
                }
 
                if (attrs[XFRMA_PROTO])
@@ -2336,6 +2345,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
                                            NETLINK_CB(skb).portid);
                }
        } else {
+               xfrm_dev_policy_delete(xp);
                xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
 
                if (err != 0)
@@ -3015,7 +3025,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
        [XFRMA_ALG_COMP]        = { .len = sizeof(struct xfrm_algo) },
        [XFRMA_ENCAP]           = { .len = sizeof(struct xfrm_encap_tmpl) },
        [XFRMA_TMPL]            = { .len = sizeof(struct xfrm_user_tmpl) },
-       [XFRMA_SEC_CTX]         = { .len = sizeof(struct xfrm_sec_ctx) },
+       [XFRMA_SEC_CTX]         = { .len = sizeof(struct xfrm_user_sec_ctx) },
        [XFRMA_LTIME_VAL]       = { .len = sizeof(struct xfrm_lifetime_cur) },
        [XFRMA_REPLAY_VAL]      = { .len = sizeof(struct xfrm_replay_state) },
        [XFRMA_REPLAY_THRESH]   = { .type = NLA_U32 },
@@ -3035,6 +3045,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
        [XFRMA_SET_MARK]        = { .type = NLA_U32 },
        [XFRMA_SET_MARK_MASK]   = { .type = NLA_U32 },
        [XFRMA_IF_ID]           = { .type = NLA_U32 },
+       [XFRMA_MTIMER_THRESH]   = { .type = NLA_U32 },
 };
 EXPORT_SYMBOL_GPL(xfrma_policy);
 
index 7c9d9f1..4124bfa 100644 (file)
@@ -257,7 +257,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \
        -fno-partial-inlining -fplugin-arg-arm_ssp_per_task_plugin-% \
        -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \
        -fzero-call-used-regs=% -fno-stack-clash-protection \
-       -fno-inline-functions-called-once \
+       -fno-inline-functions-called-once -fsanitize=bounds-strict \
        --param=% --param asan-%
 
 # Derived from `scripts/Makefile.clang`.
index 3e601ce..0589549 100644 (file)
@@ -13,5 +13,6 @@
 #include <linux/sched.h>
 
 /* `bindgen` gets confused at certain things. */
+const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
 const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
 const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO;
index 397a3dd..9363b52 100644 (file)
@@ -9,6 +9,36 @@ use crate::bindings;
 
 struct KernelAllocator;
 
+/// Calls `krealloc` with a proper size to alloc a new object aligned to `new_layout`'s alignment.
+///
+/// # Safety
+///
+/// - `ptr` can be either null or a pointer which has been allocated by this allocator.
+/// - `new_layout` must have a non-zero size.
+unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gfp_t) -> *mut u8 {
+    // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
+    let layout = new_layout.pad_to_align();
+
+    let mut size = layout.size();
+
+    if layout.align() > bindings::BINDINGS_ARCH_SLAB_MINALIGN {
+        // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
+        // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
+        // more information).
+        //
+        // Note that `layout.size()` (after padding) is guaranteed to be a multiple of
+        // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
+        size = size.next_power_of_two();
+    }
+
+    // SAFETY:
+    // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
+    //   function safety requirement.
+    // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
+    //    according to the function safety requirement) or a result from `next_power_of_two()`.
+    unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 }
+}
+
 unsafe impl GlobalAlloc for KernelAllocator {
     unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
         // `krealloc()` is used instead of `kmalloc()` because the latter is
@@ -30,10 +60,20 @@ static ALLOCATOR: KernelAllocator = KernelAllocator;
 // to extract the object file that has them from the archive. For the moment,
 // let's generate them ourselves instead.
 //
+// Note: Although these are *safe* functions, they are called by the compiler
+// with parameters that obey the same `GlobalAlloc` function safety
+// requirements: size and align should form a valid layout, and size is
+// greater than 0.
+//
 // Note that `#[no_mangle]` implies exported too, nowadays.
 #[no_mangle]
-fn __rust_alloc(size: usize, _align: usize) -> *mut u8 {
-    unsafe { bindings::krealloc(core::ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 }
+fn __rust_alloc(size: usize, align: usize) -> *mut u8 {
+    // SAFETY: See assumption above.
+    let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
+
+    // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
+    // than 0.
+    unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) }
 }
 
 #[no_mangle]
@@ -42,23 +82,27 @@ fn __rust_dealloc(ptr: *mut u8, _size: usize, _align: usize) {
 }
 
 #[no_mangle]
-fn __rust_realloc(ptr: *mut u8, _old_size: usize, _align: usize, new_size: usize) -> *mut u8 {
-    unsafe {
-        bindings::krealloc(
-            ptr as *const core::ffi::c_void,
-            new_size,
-            bindings::GFP_KERNEL,
-        ) as *mut u8
-    }
+fn __rust_realloc(ptr: *mut u8, _old_size: usize, align: usize, new_size: usize) -> *mut u8 {
+    // SAFETY: See assumption above.
+    let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, align) };
+
+    // SAFETY: Per assumption above, `ptr` is allocated by `__rust_*` before, and the size of
+    // `new_layout` is greater than 0.
+    unsafe { krealloc_aligned(ptr, new_layout, bindings::GFP_KERNEL) }
 }
 
 #[no_mangle]
-fn __rust_alloc_zeroed(size: usize, _align: usize) -> *mut u8 {
+fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8 {
+    // SAFETY: See assumption above.
+    let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
+
+    // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
+    // than 0.
     unsafe {
-        bindings::krealloc(
-            core::ptr::null(),
-            size,
+        krealloc_aligned(
+            ptr::null_mut(),
+            layout,
             bindings::GFP_KERNEL | bindings::__GFP_ZERO,
-        ) as *mut u8
+        )
     }
 }
index a89843c..172f563 100644 (file)
@@ -243,8 +243,7 @@ impl<T: 'static> ForeignOwnable for Arc<T> {
         let inner = NonNull::new(ptr as *mut ArcInner<T>).unwrap();
 
         // SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive
-        // for the lifetime of the returned value. Additionally, the safety requirements of
-        // `ForeignOwnable::borrow_mut` ensure that no new mutable references are created.
+        // for the lifetime of the returned value.
         unsafe { ArcBorrow::new(inner) }
     }
 
index 1e5380b..d479f8d 100644 (file)
@@ -35,34 +35,16 @@ pub trait ForeignOwnable: Sized {
     ///
     /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
     /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
-    /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow_mut`]
-    /// for this object must have been dropped.
     unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> Self::Borrowed<'a>;
 
-    /// Mutably borrows a foreign-owned object.
-    ///
-    /// # Safety
-    ///
-    /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
-    /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
-    /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and
-    /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped.
-    unsafe fn borrow_mut(ptr: *const core::ffi::c_void) -> ScopeGuard<Self, fn(Self)> {
-        // SAFETY: The safety requirements ensure that `ptr` came from a previous call to
-        // `into_foreign`.
-        ScopeGuard::new_with_data(unsafe { Self::from_foreign(ptr) }, |d| {
-            d.into_foreign();
-        })
-    }
-
     /// Converts a foreign-owned object back to a Rust-owned one.
     ///
     /// # Safety
     ///
     /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
     /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
-    /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and
-    /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped.
+    /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] for
+    /// this object must have been dropped.
     unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self;
 }
 
index 34d5e7f..ee06044 100644 (file)
@@ -74,6 +74,7 @@ pub(crate) fn vtable(_attr: TokenStream, ts: TokenStream) -> TokenStream {
                 const {gen_const_name}: bool = false;",
             )
             .unwrap();
+            consts.insert(gen_const_name);
         }
     } else {
         const_items = "const USE_VTABLE_ATTR: () = ();".to_owned();
index 06d8891..e2a6a69 100644 (file)
@@ -2,7 +2,9 @@
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <linux/ftrace.h>
+#ifndef CONFIG_ARM64
 #include <asm/asm-offsets.h>
+#endif
 
 extern void my_direct_func1(void);
 extern void my_direct_func2(void);
@@ -96,6 +98,38 @@ asm (
 
 #endif /* CONFIG_S390 */
 
+#ifdef CONFIG_ARM64
+
+asm (
+"      .pushsection    .text, \"ax\", @progbits\n"
+"      .type           my_tramp1, @function\n"
+"      .globl          my_tramp1\n"
+"   my_tramp1:"
+"      hint    34\n" // bti    c
+"      sub     sp, sp, #16\n"
+"      stp     x9, x30, [sp]\n"
+"      bl      my_direct_func1\n"
+"      ldp     x30, x9, [sp]\n"
+"      add     sp, sp, #16\n"
+"      ret     x9\n"
+"      .size           my_tramp1, .-my_tramp1\n"
+
+"      .type           my_tramp2, @function\n"
+"      .globl          my_tramp2\n"
+"   my_tramp2:"
+"      hint    34\n" // bti    c
+"      sub     sp, sp, #16\n"
+"      stp     x9, x30, [sp]\n"
+"      bl      my_direct_func2\n"
+"      ldp     x30, x9, [sp]\n"
+"      add     sp, sp, #16\n"
+"      ret     x9\n"
+"      .size           my_tramp2, .-my_tramp2\n"
+"      .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
 #ifdef CONFIG_LOONGARCH
 
 asm (
index 62f6b68..2e34983 100644 (file)
@@ -2,7 +2,9 @@
 #include <linux/module.h>
 #include <linux/kthread.h>
 #include <linux/ftrace.h>
+#ifndef CONFIG_ARM64
 #include <asm/asm-offsets.h>
+#endif
 
 extern void my_direct_func1(unsigned long ip);
 extern void my_direct_func2(unsigned long ip);
@@ -103,6 +105,44 @@ asm (
 
 #endif /* CONFIG_S390 */
 
+#ifdef CONFIG_ARM64
+
+asm (
+"      .pushsection    .text, \"ax\", @progbits\n"
+"      .type           my_tramp1, @function\n"
+"      .globl          my_tramp1\n"
+"   my_tramp1:"
+"      hint    34\n" // bti    c
+"      sub     sp, sp, #32\n"
+"      stp     x9, x30, [sp]\n"
+"      str     x0, [sp, #16]\n"
+"      mov     x0, x30\n"
+"      bl      my_direct_func1\n"
+"      ldp     x30, x9, [sp]\n"
+"      ldr     x0, [sp, #16]\n"
+"      add     sp, sp, #32\n"
+"      ret     x9\n"
+"      .size           my_tramp1, .-my_tramp1\n"
+
+"      .type           my_tramp2, @function\n"
+"      .globl          my_tramp2\n"
+"   my_tramp2:"
+"      hint    34\n" // bti    c
+"      sub     sp, sp, #32\n"
+"      stp     x9, x30, [sp]\n"
+"      str     x0, [sp, #16]\n"
+"      mov     x0, x30\n"
+"      bl      my_direct_func2\n"
+"      ldp     x30, x9, [sp]\n"
+"      ldr     x0, [sp, #16]\n"
+"      add     sp, sp, #32\n"
+"      ret     x9\n"
+"      .size           my_tramp2, .-my_tramp2\n"
+"      .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
 #ifdef CONFIG_LOONGARCH
 #include <asm/asm.h>
 
index 5482cf6..9243dbf 100644 (file)
@@ -4,7 +4,9 @@
 #include <linux/mm.h> /* for handle_mm_fault() */
 #include <linux/ftrace.h>
 #include <linux/sched/stat.h>
+#ifndef CONFIG_ARM64
 #include <asm/asm-offsets.h>
+#endif
 
 extern void my_direct_func(unsigned long ip);
 
@@ -66,6 +68,29 @@ asm (
 
 #endif /* CONFIG_S390 */
 
+#ifdef CONFIG_ARM64
+
+asm (
+"      .pushsection    .text, \"ax\", @progbits\n"
+"      .type           my_tramp, @function\n"
+"      .globl          my_tramp\n"
+"   my_tramp:"
+"      hint    34\n" // bti    c
+"      sub     sp, sp, #32\n"
+"      stp     x9, x30, [sp]\n"
+"      str     x0, [sp, #16]\n"
+"      mov     x0, x30\n"
+"      bl      my_direct_func\n"
+"      ldp     x30, x9, [sp]\n"
+"      ldr     x0, [sp, #16]\n"
+"      add     sp, sp, #32\n"
+"      ret     x9\n"
+"      .size           my_tramp, .-my_tramp\n"
+"      .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
 #ifdef CONFIG_LOONGARCH
 
 #include <asm/asm.h>
index a05bc2c..e39c356 100644 (file)
@@ -3,16 +3,18 @@
 
 #include <linux/mm.h> /* for handle_mm_fault() */
 #include <linux/ftrace.h>
+#ifndef CONFIG_ARM64
 #include <asm/asm-offsets.h>
+#endif
 
-extern void my_direct_func(struct vm_area_struct *vma,
-                          unsigned long address, unsigned int flags);
+extern void my_direct_func(struct vm_area_struct *vma, unsigned long address,
+                          unsigned int flags, struct pt_regs *regs);
 
-void my_direct_func(struct vm_area_struct *vma,
-                       unsigned long address, unsigned int flags)
+void my_direct_func(struct vm_area_struct *vma, unsigned long address,
+                   unsigned int flags, struct pt_regs *regs)
 {
-       trace_printk("handle mm fault vma=%p address=%lx flags=%x\n",
-                    vma, address, flags);
+       trace_printk("handle mm fault vma=%p address=%lx flags=%x regs=%p\n",
+                    vma, address, flags, regs);
 }
 
 extern void my_tramp(void *);
@@ -34,7 +36,9 @@ asm (
 "      pushq %rdi\n"
 "      pushq %rsi\n"
 "      pushq %rdx\n"
+"      pushq %rcx\n"
 "      call my_direct_func\n"
+"      popq %rcx\n"
 "      popq %rdx\n"
 "      popq %rsi\n"
 "      popq %rdi\n"
@@ -70,6 +74,30 @@ asm (
 
 #endif /* CONFIG_S390 */
 
+#ifdef CONFIG_ARM64
+
+asm (
+"      .pushsection    .text, \"ax\", @progbits\n"
+"      .type           my_tramp, @function\n"
+"      .globl          my_tramp\n"
+"   my_tramp:"
+"      hint    34\n" // bti    c
+"      sub     sp, sp, #48\n"
+"      stp     x9, x30, [sp]\n"
+"      stp     x0, x1, [sp, #16]\n"
+"      stp     x2, x3, [sp, #32]\n"
+"      bl      my_direct_func\n"
+"      ldp     x30, x9, [sp]\n"
+"      ldp     x0, x1, [sp, #16]\n"
+"      ldp     x2, x3, [sp, #32]\n"
+"      add     sp, sp, #48\n"
+"      ret     x9\n"
+"      .size           my_tramp, .-my_tramp\n"
+"      .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
 #ifdef CONFIG_LOONGARCH
 
 asm (
index 06879bb..32c477d 100644 (file)
@@ -3,7 +3,9 @@
 
 #include <linux/sched.h> /* for wake_up_process() */
 #include <linux/ftrace.h>
+#ifndef CONFIG_ARM64
 #include <asm/asm-offsets.h>
+#endif
 
 extern void my_direct_func(struct task_struct *p);
 
@@ -63,6 +65,28 @@ asm (
 
 #endif /* CONFIG_S390 */
 
+#ifdef CONFIG_ARM64
+
+asm (
+"      .pushsection    .text, \"ax\", @progbits\n"
+"      .type           my_tramp, @function\n"
+"      .globl          my_tramp\n"
+"   my_tramp:"
+"      hint    34\n" // bti    c
+"      sub     sp, sp, #32\n"
+"      stp     x9, x30, [sp]\n"
+"      str     x0, [sp, #16]\n"
+"      bl      my_direct_func\n"
+"      ldp     x30, x9, [sp]\n"
+"      ldr     x0, [sp, #16]\n"
+"      add     sp, sp, #32\n"
+"      ret     x9\n"
+"      .size           my_tramp, .-my_tramp\n"
+"      .popsection\n"
+);
+
+#endif /* CONFIG_ARM64 */
+
 #ifdef CONFIG_LOONGARCH
 
 asm (
index 6413342..82e3fb1 100644 (file)
@@ -264,6 +264,9 @@ $(obj)/%.lst: $(src)/%.c FORCE
 
 rust_allowed_features := new_uninit
 
+# `--out-dir` is required to avoid temporaries being created by `rustc` in the
+# current working directory, which may be not accessible in the out-of-tree
+# modules case.
 rust_common_cmd = \
        RUST_MODFILE=$(modfile) $(RUSTC_OR_CLIPPY) $(rust_flags) \
        -Zallow-features=$(rust_allowed_features) \
@@ -272,7 +275,7 @@ rust_common_cmd = \
        --extern alloc --extern kernel \
        --crate-type rlib -L $(objtree)/rust/ \
        --crate-name $(basename $(notdir $@)) \
-       --emit=dep-info=$(depfile)
+       --out-dir $(dir $@) --emit=dep-info=$(depfile)
 
 # `--emit=obj`, `--emit=asm` and `--emit=llvm-ir` imply a single codegen unit
 # will be used. We explicitly request `-Ccodegen-units=1` in any case, and
index 7aea900..8f7f842 100644 (file)
@@ -86,7 +86,11 @@ hostc_flags    = -Wp,-MMD,$(depfile) \
 hostcxx_flags  = -Wp,-MMD,$(depfile) \
                  $(KBUILD_HOSTCXXFLAGS) $(HOST_EXTRACXXFLAGS) \
                  $(HOSTCXXFLAGS_$(target-stem).o)
-hostrust_flags = --emit=dep-info=$(depfile) \
+
+# `--out-dir` is required to avoid temporaries being created by `rustc` in the
+# current working directory, which may be not accessible in the out-of-tree
+# modules case.
+hostrust_flags = --out-dir $(dir $@) --emit=dep-info=$(depfile) \
                  $(KBUILD_HOSTRUSTFLAGS) $(HOST_EXTRARUSTFLAGS) \
                  $(HOSTRUSTFLAGS_$(target-stem))
 
index 880fde1..a984114 100755 (executable)
@@ -7457,6 +7457,30 @@ sub process {
                        }
                }
 
+# Complain about RCU Tasks Trace used outside of BPF (and of course, RCU).
+               our $rcu_trace_funcs = qr{(?x:
+                       rcu_read_lock_trace |
+                       rcu_read_lock_trace_held |
+                       rcu_read_unlock_trace |
+                       call_rcu_tasks_trace |
+                       synchronize_rcu_tasks_trace |
+                       rcu_barrier_tasks_trace |
+                       rcu_request_urgent_qs_task
+               )};
+               our $rcu_trace_paths = qr{(?x:
+                       kernel/bpf/ |
+                       include/linux/bpf |
+                       net/bpf/ |
+                       kernel/rcu/ |
+                       include/linux/rcu
+               )};
+               if ($line =~ /\b($rcu_trace_funcs)\s*\(/) {
+                       if ($realfile !~ m{^$rcu_trace_paths}) {
+                               WARN("RCU_TASKS_TRACE",
+                                    "use of RCU tasks trace is incorrect outside BPF or core RCU code\n" . $herecurr);
+                       }
+               }
+
 # check for lockdep_set_novalidate_class
                if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ ||
                    $line =~ /__lockdep_no_validate__\s*\)/ ) {
index 15ba565..a84cc57 100755 (executable)
@@ -19,7 +19,7 @@ _DEFAULT_OUTPUT = 'compile_commands.json'
 _DEFAULT_LOG_LEVEL = 'WARNING'
 
 _FILENAME_PATTERN = r'^\..*\.cmd$'
-_LINE_PATTERN = r'^savedcmd_[^ ]*\.o := (.* )([^ ]*\.c) *(;|$)'
+_LINE_PATTERN = r'^savedcmd_[^ ]*\.o := (.* )([^ ]*\.[cS]) *(;|$)'
 _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
 # The tools/ directory adopts a different build system, and produces .cmd
 # files in a different format. Do not support it.
diff --git a/scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci b/scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci
deleted file mode 100644 (file)
index 7c31231..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/// Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE
-/// for debugfs files.
-///
-//# Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file()
-//# imposes some significant overhead as compared to
-//# DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe().
-//
-// Copyright (C): 2016 Nicolai Stange
-// Options: --no-includes
-//
-
-virtual context
-virtual patch
-virtual org
-virtual report
-
-@dsa@
-declarer name DEFINE_SIMPLE_ATTRIBUTE;
-identifier dsa_fops;
-expression dsa_get, dsa_set, dsa_fmt;
-position p;
-@@
-DEFINE_SIMPLE_ATTRIBUTE@p(dsa_fops, dsa_get, dsa_set, dsa_fmt);
-
-@dcf@
-expression name, mode, parent, data;
-identifier dsa.dsa_fops;
-@@
-debugfs_create_file(name, mode, parent, data, &dsa_fops)
-
-
-@context_dsa depends on context && dcf@
-declarer name DEFINE_DEBUGFS_ATTRIBUTE;
-identifier dsa.dsa_fops;
-expression dsa.dsa_get, dsa.dsa_set, dsa.dsa_fmt;
-@@
-* DEFINE_SIMPLE_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
-
-
-@patch_dcf depends on patch expression@
-expression name, mode, parent, data;
-identifier dsa.dsa_fops;
-@@
-- debugfs_create_file(name, mode, parent, data, &dsa_fops)
-+ debugfs_create_file_unsafe(name, mode, parent, data, &dsa_fops)
-
-@patch_dsa depends on patch_dcf && patch@
-identifier dsa.dsa_fops;
-expression dsa.dsa_get, dsa.dsa_set, dsa.dsa_fmt;
-@@
-- DEFINE_SIMPLE_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
-+ DEFINE_DEBUGFS_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
-
-
-@script:python depends on org && dcf@
-fops << dsa.dsa_fops;
-p << dsa.p;
-@@
-msg="%s should be defined with DEFINE_DEBUGFS_ATTRIBUTE" % (fops)
-coccilib.org.print_todo(p[0], msg)
-
-@script:python depends on report && dcf@
-fops << dsa.dsa_fops;
-p << dsa.p;
-@@
-msg="WARNING: %s should be defined with DEFINE_DEBUGFS_ATTRIBUTE" % (fops)
-coccilib.report.print_report(p[0], msg)
index 84c730d..1ae39b9 100644 (file)
@@ -440,4 +440,8 @@ static inline void debug_gimple_stmt(const_gimple s)
 #define SET_DECL_MODE(decl, mode)      DECL_MODE(decl) = (mode)
 #endif
 
+#if BUILDING_GCC_VERSION >= 14000
+#define last_stmt(x)                   last_nondebug_stmt(x)
+#endif
+
 #endif
index d387c93..653b92f 100644 (file)
@@ -129,6 +129,7 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
        ssize_t readlen;
        struct sym_entry *sym;
 
+       errno = 0;
        readlen = getline(buf, buf_len, in);
        if (readlen < 0) {
                if (errno) {
@@ -349,10 +350,10 @@ static void cleanup_symbol_name(char *s)
         * ASCII[_]   = 5f
         * ASCII[a-z] = 61,7a
         *
-        * As above, replacing '.' with '\0' does not affect the main sorting,
-        * but it helps us with subsorting.
+        * As above, replacing the first '.' in ".llvm." with '\0' does not
+        * affect the main sorting, but it helps us with subsorting.
         */
-       p = strchr(s, '.');
+       p = strstr(s, ".llvm.");
        if (p)
                *p = '\0';
 }
index 17adabf..9709aca 100644 (file)
@@ -636,7 +636,7 @@ void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data)
 {
        GtkWidget *dialog;
        const gchar *intro_text =
-           "Welcome to gkc, the GTK+ graphical configuration tool\n"
+           "Welcome to gconfig, the GTK+ graphical configuration tool.\n"
            "For each option, a blank box indicates the feature is disabled, a\n"
            "check indicates it is enabled, and a dot indicates that it is to\n"
            "be compiled as a module.  Clicking on the box will cycle through the three states.\n"
@@ -647,10 +647,7 @@ void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data)
            "Although there is no cross reference yet to help you figure out\n"
            "what other options must be enabled to support the option you\n"
            "are interested in, you can still view the help of a grayed-out\n"
-           "option.\n"
-           "\n"
-           "Toggling Show Debug Info under the Options menu will show \n"
-           "the dependencies, which you can then match by examining other options.";
+           "option.";
 
        dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd),
                                        GTK_DIALOG_DESTROY_WITH_PARENT,
@@ -667,7 +664,7 @@ void on_about1_activate(GtkMenuItem * menuitem, gpointer user_data)
 {
        GtkWidget *dialog;
        const gchar *about_text =
-           "gkc is copyright (c) 2002 Romain Lievin <roms@lpg.ticalc.org>.\n"
+           "gconfig is copyright (c) 2002 Romain Lievin <roms@lpg.ticalc.org>.\n"
              "Based on the source code from Roman Zippel.\n";
 
        dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd),
@@ -685,7 +682,7 @@ void on_license1_activate(GtkMenuItem * menuitem, gpointer user_data)
 {
        GtkWidget *dialog;
        const gchar *license_text =
-           "gkc is released under the terms of the GNU GPL v2.\n"
+           "gconfig is released under the terms of the GNU GPL v2.\n"
              "For more information, please see the source code or\n"
              "visit http://www.fsf.org/licenses/licenses.html\n";
 
index fc7ba95..855c486 100644 (file)
@@ -1541,7 +1541,6 @@ temeprature||temperature
 temorary||temporary
 temproarily||temporarily
 temperture||temperature
-thead||thread
 theads||threads
 therfore||therefore
 thier||their
index 0f29596..2cff851 100644 (file)
@@ -279,6 +279,29 @@ config ZERO_CALL_USED_REGS
 
 endmenu
 
+menu "Hardening of kernel data structures"
+
+config LIST_HARDENED
+       bool "Check integrity of linked list manipulation"
+       help
+         Minimal integrity checking in the linked-list manipulation routines
+         to catch memory corruptions that are not guaranteed to result in an
+         immediate access fault.
+
+         If unsure, say N.
+
+config BUG_ON_DATA_CORRUPTION
+       bool "Trigger a BUG when data corruption is detected"
+       select LIST_HARDENED
+       help
+         Select this option if the kernel should BUG when it encounters
+         data corruption in kernel memory structures when they get checked
+         for validity.
+
+         If unsure, say N.
+
+endmenu
+
 config CC_HAS_RANDSTRUCT
        def_bool $(cc-option,-frandomize-layout-seed-file=/dev/null)
        # Randstruct was first added in Clang 15, but it isn't safe to use until
index db7a51a..bd6a910 100644 (file)
@@ -226,7 +226,7 @@ static int __aafs_setup_d_inode(struct inode *dir, struct dentry *dentry,
 
        inode->i_ino = get_next_ino();
        inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_private = data;
        if (S_ISDIR(mode)) {
                inode->i_op = iops ? iops : &simple_dir_inode_operations;
@@ -1554,8 +1554,11 @@ void __aafs_profile_migrate_dents(struct aa_profile *old,
 
        for (i = 0; i < AAFS_PROF_SIZEOF; i++) {
                new->dents[i] = old->dents[i];
-               if (new->dents[i])
-                       new->dents[i]->d_inode->i_mtime = current_time(new->dents[i]->d_inode);
+               if (new->dents[i]) {
+                       struct inode *inode = d_inode(new->dents[i]);
+
+                       inode->i_mtime = inode_set_ctime_current(inode);
+               }
                old->dents[i] = NULL;
        }
 }
@@ -2540,7 +2543,7 @@ static int aa_mk_null_file(struct dentry *parent)
 
        inode->i_ino = get_next_ino();
        inode->i_mode = S_IFCHR | S_IRUGO | S_IWUGO;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO,
                           MKDEV(MEM_MAJOR, 3));
        d_instantiate(dentry, inode);
index 694fb7a..8b88460 100644 (file)
@@ -86,10 +86,13 @@ void __aa_loaddata_update(struct aa_loaddata *data, long revision)
 
        data->revision = revision;
        if ((data->dents[AAFS_LOADDATA_REVISION])) {
-               d_inode(data->dents[AAFS_LOADDATA_DIR])->i_mtime =
-                       current_time(d_inode(data->dents[AAFS_LOADDATA_DIR]));
-               d_inode(data->dents[AAFS_LOADDATA_REVISION])->i_mtime =
-                       current_time(d_inode(data->dents[AAFS_LOADDATA_REVISION]));
+               struct inode *inode;
+
+               inode = d_inode(data->dents[AAFS_LOADDATA_DIR]);
+               inode->i_mtime = inode_set_ctime_current(inode);
+
+               inode = d_inode(data->dents[AAFS_LOADDATA_REVISION]);
+               inode->i_mtime = inode_set_ctime_current(inode);
        }
 }
 
index 6c32693..3aa75ff 100644 (file)
@@ -145,7 +145,7 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
 
        inode->i_ino = get_next_ino();
        inode->i_mode = mode;
-       inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+       inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
        inode->i_private = data;
        if (S_ISDIR(mode)) {
                inode->i_op = &simple_dir_inode_operations;
index c9b3bd8..7a0420c 100644 (file)
@@ -68,7 +68,7 @@ enum policy_rule_list { IMA_DEFAULT_POLICY = 1, IMA_CUSTOM_POLICY };
 
 struct ima_rule_opt_list {
        size_t count;
-       char *items[];
+       char *items[] __counted_by(count);
 };
 
 /*
@@ -342,6 +342,7 @@ static struct ima_rule_opt_list *ima_alloc_rule_opt_list(const substring_t *src)
                kfree(src_copy);
                return ERR_PTR(-ENOMEM);
        }
+       opt_list->count = count;
 
        /*
         * strsep() has already replaced all instances of '|' with '\0',
@@ -357,7 +358,6 @@ static struct ima_rule_opt_list *ima_alloc_rule_opt_list(const substring_t *src)
                opt_list->items[i] = cur;
                cur = strchr(cur, '\0') + 1;
        }
-       opt_list->count = count;
 
        return opt_list;
 }
index e769dcb..c7c381a 100644 (file)
@@ -22,8 +22,8 @@ static int __init load_ipl_certs(void)
 
        if (!ipl_cert_list_addr)
                return 0;
-       /* Copy the certificates to the system keyring */
-       ptr = (void *) ipl_cert_list_addr;
+       /* Copy the certificates to the platform keyring */
+       ptr = __va(ipl_cert_list_addr);
        end = ptr + ipl_cert_list_size;
        while ((void *) ptr < end) {
                len = *(unsigned int *) ptr;
index d54f73c..19be69f 100644 (file)
@@ -980,14 +980,19 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
        ret = -EACCES;
        down_write(&key->sem);
 
-       if (!capable(CAP_SYS_ADMIN)) {
+       {
+               bool is_privileged_op = false;
+
                /* only the sysadmin can chown a key to some other UID */
                if (user != (uid_t) -1 && !uid_eq(key->uid, uid))
-                       goto error_put;
+                       is_privileged_op = true;
 
                /* only the sysadmin can set the key's GID to a group other
                 * than one of those that the current process subscribes to */
                if (group != (gid_t) -1 && !gid_eq(gid, key->gid) && !in_group_p(gid))
+                       is_privileged_op = true;
+
+               if (is_privileged_op && !capable(CAP_SYS_ADMIN))
                        goto error_put;
        }
 
@@ -1088,7 +1093,7 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
        down_write(&key->sem);
 
        /* if we're not the sysadmin, we can only change a key that we own */
-       if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid())) {
+       if (uid_eq(key->uid, current_fsuid()) || capable(CAP_SYS_ADMIN)) {
                key->perm = perm;
                notify_key(key, NOTIFY_KEY_SETATTR, 0);
                ret = 0;
index 07a0ef2..a7673ad 100644 (file)
@@ -401,17 +401,21 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
        set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
 
        if (dest_keyring) {
-               ret = __key_link_lock(dest_keyring, &ctx->index_key);
+               ret = __key_link_lock(dest_keyring, &key->index_key);
                if (ret < 0)
                        goto link_lock_failed;
-               ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
-               if (ret < 0)
-                       goto link_prealloc_failed;
        }
 
-       /* attach the key to the destination keyring under lock, but we do need
+       /*
+        * Attach the key to the destination keyring under lock, but we do need
         * to do another check just in case someone beat us to it whilst we
-        * waited for locks */
+        * waited for locks.
+        *
+        * The caller might specify a comparison function which looks for keys
+        * that do not exactly match but are still equivalent from the caller's
+        * perspective. The __key_link_begin() operation must be done only after
+        * an actual key is determined.
+        */
        mutex_lock(&key_construction_mutex);
 
        rcu_read_lock();
@@ -420,12 +424,16 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
        if (!IS_ERR(key_ref))
                goto key_already_present;
 
-       if (dest_keyring)
+       if (dest_keyring) {
+               ret = __key_link_begin(dest_keyring, &key->index_key, &edit);
+               if (ret < 0)
+                       goto link_alloc_failed;
                __key_link(dest_keyring, key, &edit);
+       }
 
        mutex_unlock(&key_construction_mutex);
        if (dest_keyring)
-               __key_link_end(dest_keyring, &ctx->index_key, edit);
+               __key_link_end(dest_keyring, &key->index_key, edit);
        mutex_unlock(&user->cons_lock);
        *_key = key;
        kleave(" = 0 [%d]", key_serial(key));
@@ -438,10 +446,13 @@ key_already_present:
        mutex_unlock(&key_construction_mutex);
        key = key_ref_to_ptr(key_ref);
        if (dest_keyring) {
+               ret = __key_link_begin(dest_keyring, &key->index_key, &edit);
+               if (ret < 0)
+                       goto link_alloc_failed_unlocked;
                ret = __key_link_check_live_key(dest_keyring, key);
                if (ret == 0)
                        __key_link(dest_keyring, key, &edit);
-               __key_link_end(dest_keyring, &ctx->index_key, edit);
+               __key_link_end(dest_keyring, &key->index_key, edit);
                if (ret < 0)
                        goto link_check_failed;
        }
@@ -456,8 +467,10 @@ link_check_failed:
        kleave(" = %d [linkcheck]", ret);
        return ret;
 
-link_prealloc_failed:
-       __key_link_end(dest_keyring, &ctx->index_key, edit);
+link_alloc_failed:
+       mutex_unlock(&key_construction_mutex);
+link_alloc_failed_unlocked:
+       __key_link_end(dest_keyring, &key->index_key, edit);
 link_lock_failed:
        mutex_unlock(&user->cons_lock);
        key_put(key);
index b72b82b..b348e16 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/sysctl.h>
 #include "internal.h"
 
-struct ctl_table key_sysctls[] = {
+static struct ctl_table key_sysctls[] = {
        {
                .procname = "maxkeys",
                .data = &key_quota_maxkeys,
index 2b2c8eb..bc700f8 100644 (file)
@@ -186,7 +186,7 @@ int tpm2_key_priv(void *context, size_t hdrlen,
 }
 
 /**
- * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
+ * tpm2_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
  *
  * @buf: an allocated tpm_buf instance
  * @session_handle: session handle
index ebae964..a9d4045 100644 (file)
@@ -336,6 +336,7 @@ static int read_trusted_verity_root_digests(unsigned int fd)
                        rc = -ENOMEM;
                        goto err;
                }
+               trd->len = len;
 
                if (hex2bin(trd->data, d, len)) {
                        kfree(trd);
@@ -343,8 +344,6 @@ static int read_trusted_verity_root_digests(unsigned int fd)
                        goto err;
                }
 
-               trd->len = len;
-
                list_add_tail(&trd->node, &dm_verity_loadpin_trusted_root_digests);
        }
 
index b720424..549104a 100644 (file)
@@ -1139,6 +1139,20 @@ void security_bprm_committed_creds(struct linux_binprm *bprm)
 }
 
 /**
+ * security_fs_context_submount() - Initialise fc->security
+ * @fc: new filesystem context
+ * @reference: dentry reference for submount/remount
+ *
+ * Fill out the ->security field for a new fs_context.
+ *
+ * Return: Returns 0 on success or negative error code on failure.
+ */
+int security_fs_context_submount(struct fs_context *fc, struct super_block *reference)
+{
+       return call_int_hook(fs_context_submount, 0, fc, reference);
+}
+
+/**
  * security_fs_context_dup() - Duplicate a fs_context LSM blob
  * @fc: destination filesystem context
  * @src_fc: source filesystem context
index d06e350..afd6637 100644 (file)
@@ -2745,6 +2745,27 @@ static int selinux_umount(struct vfsmount *mnt, int flags)
                                   FILESYSTEM__UNMOUNT, NULL);
 }
 
+static int selinux_fs_context_submount(struct fs_context *fc,
+                                  struct super_block *reference)
+{
+       const struct superblock_security_struct *sbsec;
+       struct selinux_mnt_opts *opts;
+
+       opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+       if (!opts)
+               return -ENOMEM;
+
+       sbsec = selinux_superblock(reference);
+       if (sbsec->flags & FSCONTEXT_MNT)
+               opts->fscontext_sid = sbsec->sid;
+       if (sbsec->flags & CONTEXT_MNT)
+               opts->context_sid = sbsec->mntpoint_sid;
+       if (sbsec->flags & DEFCONTEXT_MNT)
+               opts->defcontext_sid = sbsec->def_sid;
+       fc->security = opts;
+       return 0;
+}
+
 static int selinux_fs_context_dup(struct fs_context *fc,
                                  struct fs_context *src_fc)
 {
@@ -7182,6 +7203,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
        /*
         * PUT "CLONING" (ACCESSING + ALLOCATING) HOOKS HERE
         */
+       LSM_HOOK_INIT(fs_context_submount, selinux_fs_context_submount),
        LSM_HOOK_INIT(fs_context_dup, selinux_fs_context_dup),
        LSM_HOOK_INIT(fs_context_parse_param, selinux_fs_context_parse_param),
        LSM_HOOK_INIT(sb_eat_lsm_opts, selinux_sb_eat_lsm_opts),
index bad1f6b..9dafb6f 100644 (file)
@@ -1197,7 +1197,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode)
 
        if (ret) {
                ret->i_mode = mode;
-               ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+               ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
        }
        return ret;
 }
index 31b08b3..dc90486 100644 (file)
@@ -2005,6 +2005,7 @@ static int filename_trans_read_helper(struct policydb *p, void *fp)
                if (!datum)
                        goto out;
 
+               datum->next = NULL;
                *dst = datum;
 
                /* ebitmap_read() will at least init the bitmap */
@@ -2017,7 +2018,6 @@ static int filename_trans_read_helper(struct policydb *p, void *fp)
                        goto out;
 
                datum->otype = le32_to_cpu(buf[0]);
-               datum->next = NULL;
 
                dst = &datum->next;
        }
index 6e270cf..a8201cf 100644 (file)
@@ -615,6 +615,56 @@ out_opt_err:
 }
 
 /**
+ * smack_fs_context_submount - Initialise security data for a filesystem context
+ * @fc: The filesystem context.
+ * @reference: reference superblock
+ *
+ * Returns 0 on success or -ENOMEM on error.
+ */
+static int smack_fs_context_submount(struct fs_context *fc,
+                                struct super_block *reference)
+{
+       struct superblock_smack *sbsp;
+       struct smack_mnt_opts *ctx;
+       struct inode_smack *isp;
+
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+       fc->security = ctx;
+
+       sbsp = smack_superblock(reference);
+       isp = smack_inode(reference->s_root->d_inode);
+
+       if (sbsp->smk_default) {
+               ctx->fsdefault = kstrdup(sbsp->smk_default->smk_known, GFP_KERNEL);
+               if (!ctx->fsdefault)
+                       return -ENOMEM;
+       }
+
+       if (sbsp->smk_floor) {
+               ctx->fsfloor = kstrdup(sbsp->smk_floor->smk_known, GFP_KERNEL);
+               if (!ctx->fsfloor)
+                       return -ENOMEM;
+       }
+
+       if (sbsp->smk_hat) {
+               ctx->fshat = kstrdup(sbsp->smk_hat->smk_known, GFP_KERNEL);
+               if (!ctx->fshat)
+                       return -ENOMEM;
+       }
+
+       if (isp->smk_flags & SMK_INODE_TRANSMUTE) {
+               if (sbsp->smk_root) {
+                       ctx->fstransmute = kstrdup(sbsp->smk_root->smk_known, GFP_KERNEL);
+                       if (!ctx->fstransmute)
+                               return -ENOMEM;
+               }
+       }
+       return 0;
+}
+
+/**
  * smack_fs_context_dup - Duplicate the security data on fs_context duplication
  * @fc: The new filesystem context.
  * @src_fc: The source filesystem context being duplicated.
@@ -4876,6 +4926,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = {
        LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme),
        LSM_HOOK_INIT(syslog, smack_syslog),
 
+       LSM_HOOK_INIT(fs_context_submount, smack_fs_context_submount),
        LSM_HOOK_INIT(fs_context_dup, smack_fs_context_dup),
        LSM_HOOK_INIT(fs_context_parse_param, smack_fs_context_parse_param),
 
index 9b80f82..f3f14ff 100644 (file)
@@ -149,6 +149,7 @@ int snd_seq_create_port(struct snd_seq_client *client, int port,
        write_lock_irq(&client->ports_lock);
        list_for_each_entry(p, &client->ports_list_head, list) {
                if (p->addr.port == port) {
+                       kfree(new_port);
                        num = -EBUSY;
                        goto unlock;
                }
index fe21c80..f26a181 100644 (file)
@@ -298,8 +298,7 @@ static void update_group_attrs(struct seq_ump_client *client)
        }
 
        list_for_each_entry(fb, &client->ump->block_list, list) {
-               if (fb->info.first_group < 0 ||
-                   fb->info.first_group + fb->info.num_groups > SNDRV_UMP_MAX_GROUPS)
+               if (fb->info.first_group + fb->info.num_groups > SNDRV_UMP_MAX_GROUPS)
                        break;
                group = &client->groups[fb->info.first_group];
                for (i = 0; i < fb->info.num_groups; i++, group++) {
index 2ae912a..291e7fe 100644 (file)
@@ -110,8 +110,6 @@ struct pcmtst_buf_iter {
        struct timer_list timer_instance;
 };
 
-static struct pcmtst *pcmtst;
-
 static struct snd_pcm_hardware snd_pcmtst_hw = {
        .info = (SNDRV_PCM_INFO_INTERLEAVED |
                 SNDRV_PCM_INFO_BLOCK_TRANSFER |
@@ -552,6 +550,7 @@ _err_free_chip:
 static int pcmtst_probe(struct platform_device *pdev)
 {
        struct snd_card *card;
+       struct pcmtst *pcmtst;
        int err;
 
        err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
@@ -573,13 +572,16 @@ static int pcmtst_probe(struct platform_device *pdev)
        if (err < 0)
                return err;
 
+       platform_set_drvdata(pdev, pcmtst);
+
        return 0;
 }
 
-static int pdev_remove(struct platform_device *dev)
+static void pdev_remove(struct platform_device *pdev)
 {
+       struct pcmtst *pcmtst = platform_get_drvdata(pdev);
+
        snd_pcmtst_free(pcmtst);
-       return 0;
 }
 
 static struct platform_device pcmtst_pdev = {
@@ -589,7 +591,7 @@ static struct platform_device pcmtst_pdev = {
 
 static struct platform_driver pcmtst_pdrv = {
        .probe =        pcmtst_probe,
-       .remove =       pdev_remove,
+       .remove_new =   pdev_remove,
        .driver =       {
                .name = "pcmtest",
        },
index b288874..36b411d 100644 (file)
@@ -550,6 +550,10 @@ const struct snd_pci_quirk cs8409_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0C50, "Dolphin", CS8409_DOLPHIN),
        SND_PCI_QUIRK(0x1028, 0x0C51, "Dolphin", CS8409_DOLPHIN),
        SND_PCI_QUIRK(0x1028, 0x0C52, "Dolphin", CS8409_DOLPHIN),
+       SND_PCI_QUIRK(0x1028, 0x0C73, "Dolphin", CS8409_DOLPHIN),
+       SND_PCI_QUIRK(0x1028, 0x0C75, "Dolphin", CS8409_DOLPHIN),
+       SND_PCI_QUIRK(0x1028, 0x0C7D, "Dolphin", CS8409_DOLPHIN),
+       SND_PCI_QUIRK(0x1028, 0x0C7F, "Dolphin", CS8409_DOLPHIN),
        {} /* terminator */
 };
 
index e2f8b60..dc7b7a4 100644 (file)
@@ -122,6 +122,7 @@ struct alc_spec {
        unsigned int ultra_low_power:1;
        unsigned int has_hs_key:1;
        unsigned int no_internal_mic_pin:1;
+       unsigned int en_3kpull_low:1;
 
        /* for PLL fix */
        hda_nid_t pll_nid;
@@ -3622,6 +3623,7 @@ static void alc256_shutup(struct hda_codec *codec)
        if (!hp_pin)
                hp_pin = 0x21;
 
+       alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
        hp_pin_sense = snd_hda_jack_detect(codec, hp_pin);
 
        if (hp_pin_sense)
@@ -3638,8 +3640,7 @@ static void alc256_shutup(struct hda_codec *codec)
        /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly
         * when booting with headset plugged. So skip setting it for the codec alc257
         */
-       if (codec->core.vendor_id != 0x10ec0236 &&
-           codec->core.vendor_id != 0x10ec0257)
+       if (spec->en_3kpull_low)
                alc_update_coef_idx(codec, 0x46, 0, 3 << 12);
 
        if (!spec->no_shutup_pins)
@@ -4623,6 +4624,21 @@ static void alc236_fixup_hp_mute_led_coefbit(struct hda_codec *codec,
        }
 }
 
+static void alc236_fixup_hp_mute_led_coefbit2(struct hda_codec *codec,
+                                         const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               spec->mute_led_polarity = 0;
+               spec->mute_led_coef.idx = 0x07;
+               spec->mute_led_coef.mask = 1;
+               spec->mute_led_coef.on = 1;
+               spec->mute_led_coef.off = 0;
+               snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set);
+       }
+}
+
 /* turn on/off mic-mute LED per capture hook by coef bit */
 static int coef_micmute_led_set(struct led_classdev *led_cdev,
                                enum led_brightness brightness)
@@ -7143,6 +7159,7 @@ enum {
        ALC285_FIXUP_HP_GPIO_LED,
        ALC285_FIXUP_HP_MUTE_LED,
        ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED,
+       ALC236_FIXUP_HP_MUTE_LED_COEFBIT2,
        ALC236_FIXUP_HP_GPIO_LED,
        ALC236_FIXUP_HP_MUTE_LED,
        ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF,
@@ -7213,6 +7230,7 @@ enum {
        ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN,
        ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS,
        ALC236_FIXUP_DELL_DUAL_CODECS,
+       ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI,
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -8632,6 +8650,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc285_fixup_hp_spectre_x360_mute_led,
        },
+       [ALC236_FIXUP_HP_MUTE_LED_COEFBIT2] = {
+           .type = HDA_FIXUP_FUNC,
+           .v.func = alc236_fixup_hp_mute_led_coefbit2,
+       },
        [ALC236_FIXUP_HP_GPIO_LED] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc236_fixup_hp_gpio_led,
@@ -9145,8 +9167,6 @@ static const struct hda_fixup alc269_fixups[] = {
        [ALC287_FIXUP_CS35L41_I2C_2] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cs35l41_fixup_i2c_two,
-               .chained = true,
-               .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
        },
        [ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = {
                .type = HDA_FIXUP_FUNC,
@@ -9283,6 +9303,12 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
        },
+       [ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs35l41_fixup_i2c_two,
+               .chained = true,
+               .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -9393,6 +9419,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
        SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
        SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
+       SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -9507,6 +9540,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8811, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
        SND_PCI_QUIRK(0x103c, 0x8812, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
+       SND_PCI_QUIRK(0x103c, 0x881d, "HP 250 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
        SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
@@ -9516,6 +9550,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
        SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
        SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+       SND_PCI_QUIRK(0x103c, 0x887a, "HP Laptop 15s-eq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
        SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
@@ -9581,7 +9616,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8b96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
        SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
        SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED),
-       SND_PCI_QUIRK(0x103c, 0x8c26, "HP HP EliteBook 800G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c46, "HP EliteBook 830 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c47, "HP EliteBook 840 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c48, "HP EliteBook 860 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c49, "HP Elite x360 830 2-in-1 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
        SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -9636,6 +9677,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
        SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
+       SND_PCI_QUIRK(0x1043, 0x1d1f, "ASUS ROG Strix G17 2023 (G713PV)", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
        SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
        SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402", ALC245_FIXUP_CS35L41_SPI_2),
@@ -9727,6 +9769,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x51b3, "Clevo NS70AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -9810,14 +9853,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK),
        SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
        SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
-       SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
        SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
        SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
@@ -10600,6 +10643,7 @@ static int patch_alc269(struct hda_codec *codec)
        spec = codec->spec;
        spec->gen.shared_mic_vref_pin = 0x18;
        codec->power_save_node = 0;
+       spec->en_3kpull_low = true;
 
 #ifdef CONFIG_PM
        codec->patch_ops.suspend = alc269_suspend;
@@ -10682,12 +10726,16 @@ static int patch_alc269(struct hda_codec *codec)
                spec->shutup = alc256_shutup;
                spec->init_hook = alc256_init;
                spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
+               if (codec->core.vendor_id == 0x10ec0236 &&
+                   codec->bus->pci->vendor != PCI_VENDOR_ID_AMD)
+                       spec->en_3kpull_low = false;
                break;
        case 0x10ec0257:
                spec->codec_variant = ALC269_TYPE_ALC257;
                spec->shutup = alc256_shutup;
                spec->init_hook = alc256_init;
                spec->gen.mixer_nid = 0;
+               spec->en_3kpull_low = false;
                break;
        case 0x10ec0215:
        case 0x10ec0245:
index b033bd2..48444dd 100644 (file)
@@ -152,8 +152,8 @@ static inline int snd_ymfpci_create_gameport(struct snd_ymfpci *chip, int dev, i
 void snd_ymfpci_free_gameport(struct snd_ymfpci *chip) { }
 #endif /* SUPPORT_JOYSTICK */
 
-static int snd_card_ymfpci_probe(struct pci_dev *pci,
-                                const struct pci_device_id *pci_id)
+static int __snd_card_ymfpci_probe(struct pci_dev *pci,
+                                  const struct pci_device_id *pci_id)
 {
        static int dev;
        struct snd_card *card;
@@ -348,6 +348,12 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci,
        return 0;
 }
 
+static int snd_card_ymfpci_probe(struct pci_dev *pci,
+                                const struct pci_device_id *pci_id)
+{
+       return snd_card_free_on_error(&pci->dev, __snd_card_ymfpci_probe(pci, pci_id));
+}
+
 static struct pci_driver ymfpci_driver = {
        .name = KBUILD_MODNAME,
        .id_table = snd_ymfpci_ids,
index 5f2119f..12a176a 100644 (file)
@@ -173,7 +173,7 @@ int snd_amd_acp_find_config(struct pci_dev *pci);
 
 static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int direction)
 {
-       u64 byte_count, low = 0, high = 0;
+       u64 byte_count = 0, low = 0, high = 0;
 
        if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
                switch (dai_id) {
@@ -191,7 +191,7 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int
                        break;
                default:
                        dev_err(adata->dev, "Invalid dai id %x\n", dai_id);
-                       return -EINVAL;
+                       goto POINTER_RETURN_BYTES;
                }
        } else {
                switch (dai_id) {
@@ -213,12 +213,13 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int
                        break;
                default:
                        dev_err(adata->dev, "Invalid dai id %x\n", dai_id);
-                       return -EINVAL;
+                       goto POINTER_RETURN_BYTES;
                }
        }
        /* Get 64 bit value from two 32 bit registers */
        byte_count = (high << 32) | low;
 
+POINTER_RETURN_BYTES:
        return byte_count;
 }
 
index e96e6dc..8b853b8 100644 (file)
 #define ACP63_SDW0_DMA_MAX_STREAMS     6
 #define ACP63_SDW1_DMA_MAX_STREAMS     2
 #define ACP_P1_AUDIO_TX_THRESHOLD      6
+
+/*
+ * Below entries describes SDW0 instance DMA stream id and DMA irq bit mapping
+ * in ACP_EXTENAL_INTR_CNTL register.
+ * Stream id           IRQ Bit
+ * 0 (SDW0_AUDIO0_TX)  28
+ * 1 (SDW0_AUDIO1_TX)  26
+ * 2 (SDW0_AUDIO2_TX)  24
+ * 3 (SDW0_AUDIO0_RX)  27
+ * 4 (SDW0_AUDIO1_RX)  25
+ * 5 (SDW0_AUDIO2_RX)  23
+ */
 #define SDW0_DMA_TX_IRQ_MASK(i)        (ACP_AUDIO0_TX_THRESHOLD - (2 * (i)))
-#define SDW0_DMA_RX_IRQ_MASK(i)        (ACP_AUDIO0_RX_THRESHOLD - (2 * (i)))
+#define SDW0_DMA_RX_IRQ_MASK(i)        (ACP_AUDIO0_RX_THRESHOLD - (2 * ((i) - 3)))
+
+/*
+ * Below entries describes SDW1 instance DMA stream id and DMA irq bit mapping
+ * in ACP_EXTENAL_INTR_CNTL1 register.
+ * Stream id           IRQ Bit
+ * 0 (SDW1_AUDIO1_TX)  6
+ * 1 (SDW1_AUDIO1_RX)  5
+ */
 #define SDW1_DMA_IRQ_MASK(i)   (ACP_P1_AUDIO_TX_THRESHOLD - (i))
 
 #define ACP_DELAY_US           5
index 5b46dc8..4af3c36 100644 (file)
@@ -257,7 +257,7 @@ static int sdw_amd_scan_controller(struct device *dev)
                                             &sdw_manager_bitmap, 1);
 
        if (ret) {
-               dev_err(dev, "Failed to read mipi-sdw-manager-list: %d\n", ret);
+               dev_dbg(dev, "Failed to read mipi-sdw-manager-list: %d\n", ret);
                return -EINVAL;
        }
        count = hweight32(sdw_manager_bitmap);
@@ -641,7 +641,7 @@ static int snd_acp63_probe(struct pci_dev *pci,
        ret = get_acp63_device_config(val, pci, adata);
        /* ACP PCI driver probe should be continued even PDM or SoundWire Devices are not found */
        if (ret) {
-               dev_err(&pci->dev, "get acp device config failed:%d\n", ret);
+               dev_dbg(&pci->dev, "get acp device config failed:%d\n", ret);
                goto skip_pdev_creation;
        }
        ret = create_acp63_platform_devs(pci, adata, addr);
index ade130a..324c80f 100644 (file)
@@ -30,7 +30,7 @@ static struct sdw_dma_ring_buf_reg sdw0_dma_ring_buf_reg[ACP63_SDW0_DMA_MAX_STRE
         ACP_AUDIO2_TX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO2_TX_LINEARPOSITIONCNTR_HIGH},
        {ACP_AUDIO0_RX_DMA_SIZE, ACP_AUDIO0_RX_FIFOADDR, ACP_AUDIO0_RX_FIFOSIZE,
         ACP_AUDIO0_RX_RINGBUFSIZE, ACP_AUDIO0_RX_RINGBUFADDR, ACP_AUDIO0_RX_INTR_WATERMARK_SIZE,
-        ACP_AUDIO0_TX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO0_TX_LINEARPOSITIONCNTR_HIGH},
+        ACP_AUDIO0_RX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO0_RX_LINEARPOSITIONCNTR_HIGH},
        {ACP_AUDIO1_RX_DMA_SIZE, ACP_AUDIO1_RX_FIFOADDR, ACP_AUDIO1_RX_FIFOSIZE,
         ACP_AUDIO1_RX_RINGBUFSIZE, ACP_AUDIO1_RX_RINGBUFADDR, ACP_AUDIO1_RX_INTR_WATERMARK_SIZE,
         ACP_AUDIO1_RX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO1_RX_LINEARPOSITIONCNTR_HIGH},
@@ -39,6 +39,11 @@ static struct sdw_dma_ring_buf_reg sdw0_dma_ring_buf_reg[ACP63_SDW0_DMA_MAX_STRE
         ACP_AUDIO2_RX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO2_RX_LINEARPOSITIONCNTR_HIGH}
 };
 
+/*
+ * SDW1 instance supports one TX stream and one RX stream.
+ * For TX/RX streams DMA registers programming for SDW1 instance, it uses ACP_P1_AUDIO1 register
+ * set as per hardware register documentation
+ */
 static struct sdw_dma_ring_buf_reg sdw1_dma_ring_buf_reg[ACP63_SDW1_DMA_MAX_STREAMS] =  {
        {ACP_P1_AUDIO1_TX_DMA_SIZE, ACP_P1_AUDIO1_TX_FIFOADDR, ACP_P1_AUDIO1_TX_FIFOSIZE,
         ACP_P1_AUDIO1_TX_RINGBUFSIZE, ACP_P1_AUDIO1_TX_RINGBUFADDR,
@@ -59,6 +64,12 @@ static u32 sdw0_dma_enable_reg[ACP63_SDW0_DMA_MAX_STREAMS] = {
        ACP_SW0_AUDIO2_RX_EN,
 };
 
+/*
+ * SDW1 instance supports one TX stream and one RX stream.
+ * For TX/RX streams DMA enable register programming for SDW1 instance,
+ * it uses ACP_SW1_AUDIO1_TX_EN and ACP_SW1_AUDIO1_RX_EN registers
+ * as per hardware register documentation.
+ */
 static u32 sdw1_dma_enable_reg[ACP63_SDW1_DMA_MAX_STREAMS] = {
        ACP_SW1_AUDIO1_TX_EN,
        ACP_SW1_AUDIO1_RX_EN,
@@ -307,12 +318,13 @@ static u64 acp63_sdw_get_byte_count(struct acp_sdw_dma_stream *stream, void __io
                pos_high_reg = sdw1_dma_ring_buf_reg[stream->stream_id].pos_high_reg;
                break;
        default:
-               return -EINVAL;
+               goto POINTER_RETURN_BYTES;
        }
        if (pos_low_reg) {
                byte_count.bcount.high = readl(acp_base + pos_high_reg);
                byte_count.bcount.low = readl(acp_base + pos_low_reg);
        }
+POINTER_RETURN_BYTES:
        return byte_count.bytescount;
 }
 
index a2fe3bd..b304b35 100644 (file)
@@ -217,7 +217,7 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
                .driver_data = &acp6x_card,
                .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "82"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
                }
        },
        {
@@ -251,6 +251,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
        {
                .driver_data = &acp6x_card,
                .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"),
+               }
+       },
+       {
+               .driver_data = &acp6x_card,
+               .matches = {
                        DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"),
                }
index 49930ba..69a88dc 100644 (file)
@@ -163,11 +163,14 @@ struct atmel_i2s_gck_param {
 
 #define I2S_MCK_12M288         12288000UL
 #define I2S_MCK_11M2896                11289600UL
+#define I2S_MCK_6M144          6144000UL
 
 /* mck = (32 * (imckfs+1) / (imckdiv+1)) * fs */
 static const struct atmel_i2s_gck_param gck_params[] = {
+       /* mck = 6.144Mhz */
+       {  8000, I2S_MCK_6M144,  1, 47},        /* mck =  768 fs */
+
        /* mck = 12.288MHz */
-       {  8000, I2S_MCK_12M288, 0, 47},        /* mck = 1536 fs */
        { 16000, I2S_MCK_12M288, 1, 47},        /* mck =  768 fs */
        { 24000, I2S_MCK_12M288, 3, 63},        /* mck =  512 fs */
        { 32000, I2S_MCK_12M288, 3, 47},        /* mck =  384 fs */
index 2a62dbd..c2de4ee 100644 (file)
@@ -715,6 +715,7 @@ config SND_SOC_CS35L41_I2C
 
 config SND_SOC_CS35L45
        tristate
+       select REGMAP_IRQ
 
 config SND_SOC_CS35L45_SPI
        tristate "Cirrus Logic CS35L45 CODEC (SPI)"
@@ -1942,6 +1943,7 @@ config SND_SOC_WCD934X
        tristate "WCD9340/WCD9341 Codec"
        depends on COMMON_CLK
        depends on SLIMBUS
+       select REGMAP_IRQ
        select REGMAP_SLIMBUS
        select SND_SOC_WCD_MBHC
        depends on MFD_WCD934X || COMPILE_TEST
index 6ac501f..8a879b6 100644 (file)
@@ -168,7 +168,7 @@ static int cs35l41_get_fs_mon_config_index(int freq)
 static const DECLARE_TLV_DB_RANGE(dig_vol_tlv,
                0, 0, TLV_DB_SCALE_ITEM(TLV_DB_GAIN_MUTE, 0, 1),
                1, 913, TLV_DB_MINMAX_ITEM(-10200, 1200));
-static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1);
+static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 50, 100, 0);
 
 static const struct snd_kcontrol_new dre_ctrl =
        SOC_DAPM_SINGLE("Switch", CS35L41_PWR_CTRL3, 20, 1, 0);
index ed2a419..40666e6 100644 (file)
@@ -62,10 +62,19 @@ static const struct i2c_device_id cs35l56_id_i2c[] = {
 };
 MODULE_DEVICE_TABLE(i2c, cs35l56_id_i2c);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
+       { "CSC355C", 0 },
+       {},
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
+#endif
+
 static struct i2c_driver cs35l56_i2c_driver = {
        .driver = {
                .name           = "cs35l56",
                .pm = &cs35l56_pm_ops_i2c_spi,
+               .acpi_match_table = ACPI_PTR(cs35l56_asoc_acpi_match),
        },
        .id_table       = cs35l56_id_i2c,
        .probe          = cs35l56_i2c_probe,
index 996aab1..302f9c4 100644 (file)
@@ -59,10 +59,19 @@ static const struct spi_device_id cs35l56_id_spi[] = {
 };
 MODULE_DEVICE_TABLE(spi, cs35l56_id_spi);
 
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
+       { "CSC355C", 0 },
+       {},
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
+#endif
+
 static struct spi_driver cs35l56_spi_driver = {
        .driver = {
                .name           = "cs35l56",
                .pm = &cs35l56_pm_ops_i2c_spi,
+               .acpi_match_table = ACPI_PTR(cs35l56_asoc_acpi_match),
        },
        .id_table       = cs35l56_id_spi,
        .probe          = cs35l56_spi_probe,
index c03f9d3..fd06b9f 100644 (file)
@@ -5,7 +5,6 @@
 // Copyright (C) 2023 Cirrus Logic, Inc. and
 //                    Cirrus Logic International Semiconductor Ltd.
 
-#include <linux/acpi.h>
 #include <linux/completion.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
@@ -1354,26 +1353,22 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56)
        return 0;
 }
 
-static int cs35l56_acpi_get_name(struct cs35l56_private *cs35l56)
+static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56)
 {
-       acpi_handle handle = ACPI_HANDLE(cs35l56->dev);
-       const char *sub;
+       struct device *dev = cs35l56->dev;
+       const char *prop;
+       int ret;
 
-       /* If there is no ACPI_HANDLE, there is no ACPI for this system, return 0 */
-       if (!handle)
+       ret = device_property_read_string(dev, "cirrus,firmware-uid", &prop);
+       /* If bad sw node property, return 0 and fallback to legacy firmware path */
+       if (ret < 0)
                return 0;
 
-       sub = acpi_get_subsystem_id(handle);
-       if (IS_ERR(sub)) {
-               /* If bad ACPI, return 0 and fallback to legacy firmware path, otherwise fail */
-               if (PTR_ERR(sub) == -ENODATA)
-                       return 0;
-               else
-                       return PTR_ERR(sub);
-       }
+       cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL);
+       if (cs35l56->dsp.system_name == NULL)
+               return -ENOMEM;
 
-       cs35l56->dsp.system_name = sub;
-       dev_dbg(cs35l56->dev, "Subsystem ID: %s\n", cs35l56->dsp.system_name);
+       dev_dbg(dev, "Firmware UID: %s\n", cs35l56->dsp.system_name);
 
        return 0;
 }
@@ -1417,7 +1412,7 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56)
                gpiod_set_value_cansleep(cs35l56->reset_gpio, 1);
        }
 
-       ret = cs35l56_acpi_get_name(cs35l56);
+       ret = cs35l56_get_firmware_uid(cs35l56);
        if (ret != 0)
                goto err;
 
@@ -1604,8 +1599,6 @@ void cs35l56_remove(struct cs35l56_private *cs35l56)
 
        regcache_cache_only(cs35l56->regmap, true);
 
-       kfree(cs35l56->dsp.system_name);
-
        gpiod_set_value_cansleep(cs35l56->reset_gpio, 0);
        regulator_bulk_disable(ARRAY_SIZE(cs35l56->supplies), cs35l56->supplies);
 }
index b2106ff..e7db7bc 100644 (file)
@@ -19,6 +19,12 @@ static struct i2c_device_id cs42l51_i2c_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, cs42l51_i2c_id);
 
+const struct of_device_id cs42l51_of_match[] = {
+       { .compatible = "cirrus,cs42l51", },
+       { }
+};
+MODULE_DEVICE_TABLE(of, cs42l51_of_match);
+
 static int cs42l51_i2c_probe(struct i2c_client *i2c)
 {
        struct regmap_config config;
index a67cd3e..a7079ae 100644 (file)
@@ -823,13 +823,6 @@ int __maybe_unused cs42l51_resume(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(cs42l51_resume);
 
-const struct of_device_id cs42l51_of_match[] = {
-       { .compatible = "cirrus,cs42l51", },
-       { }
-};
-MODULE_DEVICE_TABLE(of, cs42l51_of_match);
-EXPORT_SYMBOL_GPL(cs42l51_of_match);
-
 MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>");
 MODULE_DESCRIPTION("Cirrus Logic CS42L51 ALSA SoC Codec Driver");
 MODULE_LICENSE("GPL");
index a79343e..125703e 100644 (file)
@@ -16,7 +16,6 @@ int cs42l51_probe(struct device *dev, struct regmap *regmap);
 void cs42l51_remove(struct device *dev);
 int __maybe_unused cs42l51_suspend(struct device *dev);
 int __maybe_unused cs42l51_resume(struct device *dev);
-extern const struct of_device_id cs42l51_of_match[];
 
 #define CS42L51_CHIP_ID                        0x1B
 #define CS42L51_CHIP_REV_A             0x00
index c65256b..581b334 100644 (file)
@@ -361,11 +361,15 @@ static irqreturn_t da7219_aad_irq_thread(int irq, void *data)
        struct da7219_priv *da7219 = snd_soc_component_get_drvdata(component);
        u8 events[DA7219_AAD_IRQ_REG_MAX];
        u8 statusa;
-       int i, report = 0, mask = 0;
+       int i, ret, report = 0, mask = 0;
 
        /* Read current IRQ events */
-       regmap_bulk_read(da7219->regmap, DA7219_ACCDET_IRQ_EVENT_A,
-                        events, DA7219_AAD_IRQ_REG_MAX);
+       ret = regmap_bulk_read(da7219->regmap, DA7219_ACCDET_IRQ_EVENT_A,
+                              events, DA7219_AAD_IRQ_REG_MAX);
+       if (ret) {
+               dev_warn_ratelimited(component->dev, "Failed to read IRQ events: %d\n", ret);
+               return IRQ_NONE;
+       }
 
        if (!events[DA7219_AAD_IRQ_REG_A] && !events[DA7219_AAD_IRQ_REG_B])
                return IRQ_NONE;
@@ -944,6 +948,8 @@ void da7219_aad_suspend(struct snd_soc_component *component)
                        }
                }
        }
+
+       synchronize_irq(da7219_aad->irq);
 }
 
 void da7219_aad_resume(struct snd_soc_component *component)
index 34cf607..65e497b 100644 (file)
@@ -153,7 +153,7 @@ static const char * const es8316_dmic_txt[] = {
                "dmic data at high level",
                "dmic data at low level",
 };
-static const unsigned int es8316_dmic_values[] = { 0, 1, 2 };
+static const unsigned int es8316_dmic_values[] = { 0, 2, 3 };
 static const struct soc_enum es8316_dmic_src_enum =
        SOC_VALUE_ENUM_SINGLE(ES8316_ADC_DMIC, 0, 3,
                              ARRAY_SIZE(es8316_dmic_txt),
index b5c69bb..2dfaf4f 100644 (file)
@@ -185,10 +185,10 @@ static int max98363_io_init(struct sdw_slave *slave)
        pm_runtime_get_noresume(dev);
 
        ret = regmap_read(max98363->regmap, MAX98363_R21FF_REV_ID, &reg);
-       if (!ret) {
+       if (!ret)
                dev_info(dev, "Revision ID: %X\n", reg);
-               return ret;
-       }
+       else
+               goto out;
 
        if (max98363->first_hw_init) {
                regcache_cache_bypass(max98363->regmap, false);
@@ -198,10 +198,11 @@ static int max98363_io_init(struct sdw_slave *slave)
        max98363->first_hw_init = true;
        max98363->hw_init = true;
 
+out:
        pm_runtime_mark_last_busy(dev);
        pm_runtime_put_autosuspend(dev);
 
-       return 0;
+       return ret;
 }
 
 #define MAX98363_RATES SNDRV_PCM_RATE_8000_192000
index 96d7588..ca6beb2 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/dmi.h>
 #include <linux/init.h>
 #include <linux/i2c.h>
 #include <linux/module.h>
 #include <sound/tlv.h>
 #include "nau8821.h"
 
+#define NAU8821_JD_ACTIVE_HIGH                 BIT(0)
+
+static int nau8821_quirk;
+static int quirk_override = -1;
+module_param_named(quirk, quirk_override, uint, 0444);
+MODULE_PARM_DESC(quirk, "Board-specific quirk override");
+
 #define NAU_FREF_MAX 13500000
 #define NAU_FVCO_MAX 100000000
 #define NAU_FVCO_MIN 90000000
@@ -1792,6 +1800,33 @@ static int nau8821_setup_irq(struct nau8821 *nau8821)
        return 0;
 }
 
+/* Please keep this list alphabetically sorted */
+static const struct dmi_system_id nau8821_quirk_table[] = {
+       {
+               /* Positivo CW14Q01P-V2 */
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
+                       DMI_MATCH(DMI_BOARD_NAME, "CW14Q01P-V2"),
+               },
+               .driver_data = (void *)(NAU8821_JD_ACTIVE_HIGH),
+       },
+       {}
+};
+
+static void nau8821_check_quirks(void)
+{
+       const struct dmi_system_id *dmi_id;
+
+       if (quirk_override != -1) {
+               nau8821_quirk = quirk_override;
+               return;
+       }
+
+       dmi_id = dmi_first_match(nau8821_quirk_table);
+       if (dmi_id)
+               nau8821_quirk = (unsigned long)dmi_id->driver_data;
+}
+
 static int nau8821_i2c_probe(struct i2c_client *i2c)
 {
        struct device *dev = &i2c->dev;
@@ -1812,6 +1847,12 @@ static int nau8821_i2c_probe(struct i2c_client *i2c)
 
        nau8821->dev = dev;
        nau8821->irq = i2c->irq;
+
+       nau8821_check_quirks();
+
+       if (nau8821_quirk & NAU8821_JD_ACTIVE_HIGH)
+               nau8821->jkdet_polarity = 0;
+
        nau8821_print_device_properties(nau8821);
 
        nau8821_reset_chip(nau8821->regmap);
index f43520c..e566c8d 100644 (file)
@@ -52,6 +52,7 @@ static bool rt1308_volatile_register(struct device *dev, unsigned int reg)
        case 0x300a:
        case 0xc000:
        case 0xc710:
+       case 0xcf01:
        case 0xc860 ... 0xc863:
        case 0xc870 ... 0xc873:
                return true;
@@ -213,7 +214,7 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
 {
        struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(dev);
        int ret = 0;
-       unsigned int tmp;
+       unsigned int tmp, hibernation_flag;
 
        if (rt1308->hw_init)
                return 0;
@@ -242,6 +243,10 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
 
        pm_runtime_get_noresume(&slave->dev);
 
+       regmap_read(rt1308->regmap, 0xcf01, &hibernation_flag);
+       if ((hibernation_flag != 0x00) && rt1308->first_hw_init)
+               goto _preset_ready_;
+
        /* sw reset */
        regmap_write(rt1308->regmap, RT1308_SDW_RESET, 0);
 
@@ -282,6 +287,12 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
        regmap_write(rt1308->regmap, 0xc100, 0xd7);
        regmap_write(rt1308->regmap, 0xc101, 0xd7);
 
+       /* apply BQ params */
+       rt1308_apply_bq_params(rt1308);
+
+       regmap_write(rt1308->regmap, 0xcf01, 0x01);
+
+_preset_ready_:
        if (rt1308->first_hw_init) {
                regcache_cache_bypass(rt1308->regmap, false);
                regcache_mark_dirty(rt1308->regmap);
index 0ed4fa2..eceed82 100644 (file)
@@ -53,7 +53,6 @@ static const struct reg_sequence init_list[] = {
        {RT5640_PR_BASE + 0x3d, 0x3600},
        {RT5640_PR_BASE + 0x12, 0x0aa8},
        {RT5640_PR_BASE + 0x14, 0x0aaa},
-       {RT5640_PR_BASE + 0x20, 0x6110},
        {RT5640_PR_BASE + 0x21, 0xe0e0},
        {RT5640_PR_BASE + 0x23, 0x1804},
 };
@@ -2567,9 +2566,10 @@ static void rt5640_enable_jack_detect(struct snd_soc_component *component,
        if (jack_data && jack_data->use_platform_clock)
                rt5640->use_platform_clock = jack_data->use_platform_clock;
 
-       ret = request_irq(rt5640->irq, rt5640_irq,
-                         IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
-                         "rt5640", rt5640);
+       ret = devm_request_threaded_irq(component->dev, rt5640->irq,
+                                       NULL, rt5640_irq,
+                                       IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+                                       "rt5640", rt5640);
        if (ret) {
                dev_warn(component->dev, "Failed to reguest IRQ %d: %d\n", rt5640->irq, ret);
                rt5640_disable_jack_detect(component);
@@ -2622,8 +2622,9 @@ static void rt5640_enable_hda_jack_detect(
 
        rt5640->jack = jack;
 
-       ret = request_irq(rt5640->irq, rt5640_irq,
-                         IRQF_TRIGGER_RISING | IRQF_ONESHOT, "rt5640", rt5640);
+       ret = devm_request_threaded_irq(component->dev, rt5640->irq,
+                                       NULL, rt5640_irq, IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+                                       "rt5640", rt5640);
        if (ret) {
                dev_warn(component->dev, "Failed to reguest IRQ %d: %d\n", rt5640->irq, ret);
                rt5640->irq = -ENXIO;
index acc7fb1..a506d94 100644 (file)
@@ -3950,7 +3950,11 @@ static int rt5645_i2c_probe(struct i2c_client *i2c)
         * read and power On.
         */
        msleep(TIME_TO_POWER_MS);
-       regmap_read(regmap, RT5645_VENDOR_ID2, &val);
+       ret = regmap_read(regmap, RT5645_VENDOR_ID2, &val);
+       if (ret < 0) {
+               dev_err(&i2c->dev, "Failed to read: 0x%02X\n, ret = %d", RT5645_VENDOR_ID2, ret);
+               goto err_enable;
+       }
 
        switch (val) {
        case RT5645_DEVICE_ID:
index 83c367a..525713c 100644 (file)
@@ -4472,6 +4472,8 @@ static void rt5665_remove(struct snd_soc_component *component)
        struct rt5665_priv *rt5665 = snd_soc_component_get_drvdata(component);
 
        regmap_write(rt5665->regmap, RT5665_RESET, 0);
+
+       regulator_bulk_disable(ARRAY_SIZE(rt5665->supplies), rt5665->supplies);
 }
 
 #ifdef CONFIG_PM
index 67404f4..4968a8c 100644 (file)
@@ -750,8 +750,15 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
        if (!rt5682->first_hw_init)
                return 0;
 
-       if (!slave->unattach_request)
+       if (!slave->unattach_request) {
+               if (rt5682->disable_irq == true) {
+                       mutex_lock(&rt5682->disable_irq_lock);
+                       sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
+                       rt5682->disable_irq = false;
+                       mutex_unlock(&rt5682->disable_irq_lock);
+               }
                goto regmap_sync;
+       }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT5682_PROBE_TIMEOUT));
index 119e1f9..23f23f7 100644 (file)
@@ -438,8 +438,16 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
        if (!rt711->first_hw_init)
                return 0;
 
-       if (!slave->unattach_request)
+       if (!slave->unattach_request) {
+               if (rt711->disable_irq == true) {
+                       mutex_lock(&rt711->disable_irq_lock);
+                       sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
+                       sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+                       rt711->disable_irq = false;
+                       mutex_unlock(&rt711->disable_irq_lock);
+               }
                goto regmap_sync;
+       }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT711_PROBE_TIMEOUT));
index 87dafcb..33dced3 100644 (file)
@@ -538,8 +538,15 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
        if (!rt711->first_hw_init)
                return 0;
 
-       if (!slave->unattach_request)
+       if (!slave->unattach_request) {
+               if (rt711->disable_irq == true) {
+                       mutex_lock(&rt711->disable_irq_lock);
+                       sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
+                       rt711->disable_irq = false;
+                       mutex_unlock(&rt711->disable_irq_lock);
+               }
                goto regmap_sync;
+       }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT711_PROBE_TIMEOUT));
index ad06267..6bc5039 100644 (file)
@@ -438,8 +438,16 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev)
        if (!rt712->first_hw_init)
                return 0;
 
-       if (!slave->unattach_request)
+       if (!slave->unattach_request) {
+               if (rt712->disable_irq == true) {
+                       mutex_lock(&rt712->disable_irq_lock);
+                       sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
+                       sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+                       rt712->disable_irq = false;
+                       mutex_unlock(&rt712->disable_irq_lock);
+               }
                goto regmap_sync;
+       }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT712_PROBE_TIMEOUT));
index cc57e4e..e9103ff 100644 (file)
@@ -463,8 +463,16 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev)
        if (!rt722->first_hw_init)
                return 0;
 
-       if (!slave->unattach_request)
+       if (!slave->unattach_request) {
+               if (rt722->disable_irq == true) {
+                       mutex_lock(&rt722->disable_irq_lock);
+                       sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
+                       sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+                       rt722->disable_irq = false;
+                       mutex_unlock(&rt722->disable_irq_lock);
+               }
                goto regmap_sync;
+       }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT722_PROBE_TIMEOUT));
index a88c6c2..ffb26e4 100644 (file)
@@ -57,16 +57,17 @@ static int tasdevice_change_chn_book(struct tasdevice_priv *tas_priv,
 
                if (client->addr != tasdev->dev_addr) {
                        client->addr = tasdev->dev_addr;
-                       if (tasdev->cur_book == book) {
-                               ret = regmap_write(map,
-                                       TASDEVICE_PAGE_SELECT, 0);
-                               if (ret < 0) {
-                                       dev_err(tas_priv->dev, "%s, E=%d\n",
-                                               __func__, ret);
-                                       goto out;
-                               }
+                       /* All tas2781s share the same regmap, clear the page
+                        * inside regmap once switching to another tas2781.
+                        * Register 0 at any pages and any books inside tas2781
+                        * is the same one for page-switching.
+                        */
+                       ret = regmap_write(map, TASDEVICE_PAGE_SELECT, 0);
+                       if (ret < 0) {
+                               dev_err(tas_priv->dev, "%s, E=%d\n",
+                                       __func__, ret);
+                               goto out;
                        }
-                       goto out;
                }
 
                if (tasdev->cur_book != book) {
index 1911750..5da1934 100644 (file)
@@ -1454,7 +1454,7 @@ struct wcd_mbhc *wcd_mbhc_init(struct snd_soc_component *component,
                return ERR_PTR(-EINVAL);
        }
 
-       mbhc = devm_kzalloc(dev, sizeof(*mbhc), GFP_KERNEL);
+       mbhc = kzalloc(sizeof(*mbhc), GFP_KERNEL);
        if (!mbhc)
                return ERR_PTR(-ENOMEM);
 
@@ -1474,61 +1474,76 @@ struct wcd_mbhc *wcd_mbhc_init(struct snd_soc_component *component,
 
        INIT_WORK(&mbhc->correct_plug_swch, wcd_correct_swch_plug);
 
-       ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_sw_intr, NULL,
+       ret = request_threaded_irq(mbhc->intr_ids->mbhc_sw_intr, NULL,
                                        wcd_mbhc_mech_plug_detect_irq,
                                        IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                        "mbhc sw intr", mbhc);
        if (ret)
-               goto err;
+               goto err_free_mbhc;
 
-       ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_btn_press_intr, NULL,
+       ret = request_threaded_irq(mbhc->intr_ids->mbhc_btn_press_intr, NULL,
                                        wcd_mbhc_btn_press_handler,
                                        IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                        "Button Press detect", mbhc);
        if (ret)
-               goto err;
+               goto err_free_sw_intr;
 
-       ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_btn_release_intr, NULL,
+       ret = request_threaded_irq(mbhc->intr_ids->mbhc_btn_release_intr, NULL,
                                        wcd_mbhc_btn_release_handler,
                                        IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                        "Button Release detect", mbhc);
        if (ret)
-               goto err;
+               goto err_free_btn_press_intr;
 
-       ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_hs_ins_intr, NULL,
+       ret = request_threaded_irq(mbhc->intr_ids->mbhc_hs_ins_intr, NULL,
                                        wcd_mbhc_adc_hs_ins_irq,
                                        IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                        "Elect Insert", mbhc);
        if (ret)
-               goto err;
+               goto err_free_btn_release_intr;
 
        disable_irq_nosync(mbhc->intr_ids->mbhc_hs_ins_intr);
 
-       ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_hs_rem_intr, NULL,
+       ret = request_threaded_irq(mbhc->intr_ids->mbhc_hs_rem_intr, NULL,
                                        wcd_mbhc_adc_hs_rem_irq,
                                        IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                        "Elect Remove", mbhc);
        if (ret)
-               goto err;
+               goto err_free_hs_ins_intr;
 
        disable_irq_nosync(mbhc->intr_ids->mbhc_hs_rem_intr);
 
-       ret = devm_request_threaded_irq(dev, mbhc->intr_ids->hph_left_ocp, NULL,
+       ret = request_threaded_irq(mbhc->intr_ids->hph_left_ocp, NULL,
                                        wcd_mbhc_hphl_ocp_irq,
                                        IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                        "HPH_L OCP detect", mbhc);
        if (ret)
-               goto err;
+               goto err_free_hs_rem_intr;
 
-       ret = devm_request_threaded_irq(dev, mbhc->intr_ids->hph_right_ocp, NULL,
+       ret = request_threaded_irq(mbhc->intr_ids->hph_right_ocp, NULL,
                                        wcd_mbhc_hphr_ocp_irq,
                                        IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                        "HPH_R OCP detect", mbhc);
        if (ret)
-               goto err;
+               goto err_free_hph_left_ocp;
 
        return mbhc;
-err:
+
+err_free_hph_left_ocp:
+       free_irq(mbhc->intr_ids->hph_left_ocp, mbhc);
+err_free_hs_rem_intr:
+       free_irq(mbhc->intr_ids->mbhc_hs_rem_intr, mbhc);
+err_free_hs_ins_intr:
+       free_irq(mbhc->intr_ids->mbhc_hs_ins_intr, mbhc);
+err_free_btn_release_intr:
+       free_irq(mbhc->intr_ids->mbhc_btn_release_intr, mbhc);
+err_free_btn_press_intr:
+       free_irq(mbhc->intr_ids->mbhc_btn_press_intr, mbhc);
+err_free_sw_intr:
+       free_irq(mbhc->intr_ids->mbhc_sw_intr, mbhc);
+err_free_mbhc:
+       kfree(mbhc);
+
        dev_err(dev, "Failed to request mbhc interrupts %d\n", ret);
 
        return ERR_PTR(ret);
@@ -1537,9 +1552,19 @@ EXPORT_SYMBOL(wcd_mbhc_init);
 
 void wcd_mbhc_deinit(struct wcd_mbhc *mbhc)
 {
+       free_irq(mbhc->intr_ids->hph_right_ocp, mbhc);
+       free_irq(mbhc->intr_ids->hph_left_ocp, mbhc);
+       free_irq(mbhc->intr_ids->mbhc_hs_rem_intr, mbhc);
+       free_irq(mbhc->intr_ids->mbhc_hs_ins_intr, mbhc);
+       free_irq(mbhc->intr_ids->mbhc_btn_release_intr, mbhc);
+       free_irq(mbhc->intr_ids->mbhc_btn_press_intr, mbhc);
+       free_irq(mbhc->intr_ids->mbhc_sw_intr, mbhc);
+
        mutex_lock(&mbhc->lock);
        wcd_cancel_hs_detect_plug(mbhc, &mbhc->correct_plug_swch);
        mutex_unlock(&mbhc->lock);
+
+       kfree(mbhc);
 }
 EXPORT_SYMBOL(wcd_mbhc_deinit);
 
index c0d1fa3..1b6e376 100644 (file)
@@ -2642,7 +2642,7 @@ static int wcd934x_mbhc_micb_ctrl_threshold_mic(struct snd_soc_component *compon
        return rc;
 }
 
-static inline void wcd934x_mbhc_get_result_params(struct wcd934x_codec *wcd934x,
+static void wcd934x_mbhc_get_result_params(struct wcd934x_codec *wcd934x,
                                                s16 *d1_a, u16 noff,
                                                int32_t *zdet)
 {
@@ -2683,7 +2683,7 @@ static inline void wcd934x_mbhc_get_result_params(struct wcd934x_codec *wcd934x,
        else if (x1 < minCode_param[noff])
                *zdet = WCD934X_ZDET_FLOATING_IMPEDANCE;
 
-       dev_info(wcd934x->dev, "%s: d1=%d, c1=%d, x1=0x%x, z_val=%d(milliOhm)\n",
+       dev_dbg(wcd934x->dev, "%s: d1=%d, c1=%d, x1=0x%x, z_val=%di (milliohm)\n",
                __func__, d1, c1, x1, *zdet);
 ramp_down:
        i = 0;
@@ -2740,8 +2740,8 @@ z_right:
        *zr = zdet;
 }
 
-static inline void wcd934x_wcd_mbhc_qfuse_cal(struct snd_soc_component *component,
-                                             int32_t *z_val, int flag_l_r)
+static void wcd934x_wcd_mbhc_qfuse_cal(struct snd_soc_component *component,
+                                       int32_t *z_val, int flag_l_r)
 {
        s16 q1;
        int q1_cal;
@@ -3044,6 +3044,17 @@ static int wcd934x_mbhc_init(struct snd_soc_component *component)
 
        return 0;
 }
+
+static void wcd934x_mbhc_deinit(struct snd_soc_component *component)
+{
+       struct wcd934x_codec *wcd = snd_soc_component_get_drvdata(component);
+
+       if (!wcd->mbhc)
+               return;
+
+       wcd_mbhc_deinit(wcd->mbhc);
+}
+
 static int wcd934x_comp_probe(struct snd_soc_component *component)
 {
        struct wcd934x_codec *wcd = dev_get_drvdata(component->dev);
@@ -3077,6 +3088,7 @@ static void wcd934x_comp_remove(struct snd_soc_component *comp)
 {
        struct wcd934x_codec *wcd = dev_get_drvdata(comp->dev);
 
+       wcd934x_mbhc_deinit(comp);
        wcd_clsh_ctrl_free(wcd->clsh_ctrl);
 }
 
index e7d6a02..a3c6806 100644 (file)
@@ -210,7 +210,7 @@ struct wcd938x_priv {
 };
 
 static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
-static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(line_gain, 600, -3000);
+static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000);
 static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000);
 
 struct wcd938x_mbhc_zdet_param {
@@ -2124,10 +2124,11 @@ static int wcd938x_mbhc_micb_ctrl_threshold_mic(struct snd_soc_component *compon
        return wcd938x_mbhc_micb_adjust_voltage(component, micb_mv, MIC_BIAS_2);
 }
 
-static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x,
+static void wcd938x_mbhc_get_result_params(struct snd_soc_component *component,
                                                s16 *d1_a, u16 noff,
                                                int32_t *zdet)
 {
+       struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
        int i;
        int val, val1;
        s16 c1;
@@ -2154,8 +2155,8 @@ static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x,
                usleep_range(5000, 5050);
 
        if (!c1 || !x1) {
-               pr_err("%s: Impedance detect ramp error, c1=%d, x1=0x%x\n",
-                       __func__, c1, x1);
+               dev_err(component->dev, "Impedance detect ramp error, c1=%d, x1=0x%x\n",
+                       c1, x1);
                goto ramp_down;
        }
        d1 = d1_a[c1];
@@ -2165,7 +2166,7 @@ static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x,
        else if (x1 < minCode_param[noff])
                *zdet = WCD938X_ZDET_FLOATING_IMPEDANCE;
 
-       pr_err("%s: d1=%d, c1=%d, x1=0x%x, z_val=%d(milliOhm)\n",
+       dev_dbg(component->dev, "%s: d1=%d, c1=%d, x1=0x%x, z_val=%d (milliohm)\n",
                __func__, d1, c1, x1, *zdet);
 ramp_down:
        i = 0;
@@ -2210,7 +2211,7 @@ static void wcd938x_mbhc_zdet_ramp(struct snd_soc_component *component,
                           WCD938X_ANA_MBHC_ZDET, 0x80, 0x80);
        dev_dbg(component->dev, "%s: ramp for HPH_L, noff = %d\n",
                __func__, zdet_param->noff);
-       wcd938x_mbhc_get_result_params(wcd938x, d1_a, zdet_param->noff, &zdet);
+       wcd938x_mbhc_get_result_params(component, d1_a, zdet_param->noff, &zdet);
        regmap_update_bits(wcd938x->regmap,
                           WCD938X_ANA_MBHC_ZDET, 0x80, 0x00);
 
@@ -2224,15 +2225,15 @@ z_right:
                           WCD938X_ANA_MBHC_ZDET, 0x40, 0x40);
        dev_dbg(component->dev, "%s: ramp for HPH_R, noff = %d\n",
                __func__, zdet_param->noff);
-       wcd938x_mbhc_get_result_params(wcd938x, d1_a, zdet_param->noff, &zdet);
+       wcd938x_mbhc_get_result_params(component, d1_a, zdet_param->noff, &zdet);
        regmap_update_bits(wcd938x->regmap,
                           WCD938X_ANA_MBHC_ZDET, 0x40, 0x00);
 
        *zr = zdet;
 }
 
-static inline void wcd938x_wcd_mbhc_qfuse_cal(struct snd_soc_component *component,
-                                             int32_t *z_val, int flag_l_r)
+static void wcd938x_wcd_mbhc_qfuse_cal(struct snd_soc_component *component,
+                                       int32_t *z_val, int flag_l_r)
 {
        s16 q1;
        int q1_cal;
@@ -2625,6 +2626,8 @@ static int wcd938x_mbhc_init(struct snd_soc_component *component)
                                                     WCD938X_IRQ_HPHR_OCP_INT);
 
        wcd938x->wcd_mbhc = wcd_mbhc_init(component, &mbhc_cb, intr_ids, wcd_mbhc_fields, true);
+       if (IS_ERR(wcd938x->wcd_mbhc))
+               return PTR_ERR(wcd938x->wcd_mbhc);
 
        snd_soc_add_component_controls(component, impedance_detect_controls,
                                       ARRAY_SIZE(impedance_detect_controls));
@@ -2633,6 +2636,14 @@ static int wcd938x_mbhc_init(struct snd_soc_component *component)
 
        return 0;
 }
+
+static void wcd938x_mbhc_deinit(struct snd_soc_component *component)
+{
+       struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+
+       wcd_mbhc_deinit(wcd938x->wcd_mbhc);
+}
+
 /* END MBHC */
 
 static const struct snd_kcontrol_new wcd938x_snd_controls[] = {
@@ -2652,8 +2663,8 @@ static const struct snd_kcontrol_new wcd938x_snd_controls[] = {
                       wcd938x_get_swr_port, wcd938x_set_swr_port),
        SOC_SINGLE_EXT("DSD_R Switch", WCD938X_DSD_R, 0, 1, 0,
                       wcd938x_get_swr_port, wcd938x_set_swr_port),
-       SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 0, line_gain),
-       SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 0, line_gain),
+       SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 1, line_gain),
+       SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 1, line_gain),
        WCD938X_EAR_PA_GAIN_TLV("EAR_PA Volume", WCD938X_ANA_EAR_COMPANDER_CTL,
                                2, 0x10, 0, ear_pa_gain),
        SOC_SINGLE_EXT("ADC1 Switch", WCD938X_ADC1, 1, 1, 0,
@@ -3080,16 +3091,33 @@ static int wcd938x_irq_init(struct wcd938x_priv *wcd, struct device *dev)
 static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
 {
        struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+       struct sdw_slave *tx_sdw_dev = wcd938x->tx_sdw_dev;
        struct device *dev = component->dev;
+       unsigned long time_left;
        int ret, i;
 
+       time_left = wait_for_completion_timeout(&tx_sdw_dev->initialization_complete,
+                                               msecs_to_jiffies(2000));
+       if (!time_left) {
+               dev_err(dev, "soundwire device init timeout\n");
+               return -ETIMEDOUT;
+       }
+
        snd_soc_component_init_regmap(component, wcd938x->regmap);
 
+       ret = pm_runtime_resume_and_get(dev);
+       if (ret < 0)
+               return ret;
+
        wcd938x->variant = snd_soc_component_read_field(component,
                                                 WCD938X_DIGITAL_EFUSE_REG_0,
                                                 WCD938X_ID_MASK);
 
        wcd938x->clsh_info = wcd_clsh_ctrl_alloc(component, WCD938X);
+       if (IS_ERR(wcd938x->clsh_info)) {
+               pm_runtime_put(dev);
+               return PTR_ERR(wcd938x->clsh_info);
+       }
 
        wcd938x_io_init(wcd938x);
        /* Set all interrupts as edge triggered */
@@ -3098,6 +3126,8 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
                             (WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0);
        }
 
+       pm_runtime_put(dev);
+
        wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
                                                       WCD938X_IRQ_HPHR_PDM_WD_INT);
        wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
@@ -3109,20 +3139,26 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
        ret = request_threaded_irq(wcd938x->hphr_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
                                   IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                   "HPHR PDM WD INT", wcd938x);
-       if (ret)
+       if (ret) {
                dev_err(dev, "Failed to request HPHR WD interrupt (%d)\n", ret);
+               goto err_free_clsh_ctrl;
+       }
 
        ret = request_threaded_irq(wcd938x->hphl_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
                                   IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                   "HPHL PDM WD INT", wcd938x);
-       if (ret)
+       if (ret) {
                dev_err(dev, "Failed to request HPHL WD interrupt (%d)\n", ret);
+               goto err_free_hphr_pdm_wd_int;
+       }
 
        ret = request_threaded_irq(wcd938x->aux_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
                                   IRQF_ONESHOT | IRQF_TRIGGER_RISING,
                                   "AUX PDM WD INT", wcd938x);
-       if (ret)
+       if (ret) {
                dev_err(dev, "Failed to request Aux WD interrupt (%d)\n", ret);
+               goto err_free_hphl_pdm_wd_int;
+       }
 
        /* Disable watchdog interrupt for HPH and AUX */
        disable_irq_nosync(wcd938x->hphr_pdm_wd_int);
@@ -3137,7 +3173,7 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
                        dev_err(component->dev,
                                "%s: Failed to add snd ctrls for variant: %d\n",
                                __func__, wcd938x->variant);
-                       goto err;
+                       goto err_free_aux_pdm_wd_int;
                }
                break;
        case WCD9385:
@@ -3147,7 +3183,7 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
                        dev_err(component->dev,
                                "%s: Failed to add snd ctrls for variant: %d\n",
                                __func__, wcd938x->variant);
-                       goto err;
+                       goto err_free_aux_pdm_wd_int;
                }
                break;
        default:
@@ -3155,12 +3191,38 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
        }
 
        ret = wcd938x_mbhc_init(component);
-       if (ret)
+       if (ret) {
                dev_err(component->dev,  "mbhc initialization failed\n");
-err:
+               goto err_free_aux_pdm_wd_int;
+       }
+
+       return 0;
+
+err_free_aux_pdm_wd_int:
+       free_irq(wcd938x->aux_pdm_wd_int, wcd938x);
+err_free_hphl_pdm_wd_int:
+       free_irq(wcd938x->hphl_pdm_wd_int, wcd938x);
+err_free_hphr_pdm_wd_int:
+       free_irq(wcd938x->hphr_pdm_wd_int, wcd938x);
+err_free_clsh_ctrl:
+       wcd_clsh_ctrl_free(wcd938x->clsh_info);
+
        return ret;
 }
 
+static void wcd938x_soc_codec_remove(struct snd_soc_component *component)
+{
+       struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+
+       wcd938x_mbhc_deinit(component);
+
+       free_irq(wcd938x->aux_pdm_wd_int, wcd938x);
+       free_irq(wcd938x->hphl_pdm_wd_int, wcd938x);
+       free_irq(wcd938x->hphr_pdm_wd_int, wcd938x);
+
+       wcd_clsh_ctrl_free(wcd938x->clsh_info);
+}
+
 static int wcd938x_codec_set_jack(struct snd_soc_component *comp,
                                  struct snd_soc_jack *jack, void *data)
 {
@@ -3177,6 +3239,7 @@ static int wcd938x_codec_set_jack(struct snd_soc_component *comp,
 static const struct snd_soc_component_driver soc_codec_dev_wcd938x = {
        .name = "wcd938x_codec",
        .probe = wcd938x_soc_codec_probe,
+       .remove = wcd938x_soc_codec_remove,
        .controls = wcd938x_snd_controls,
        .num_controls = ARRAY_SIZE(wcd938x_snd_controls),
        .dapm_widgets = wcd938x_dapm_widgets,
index 068e610..f2baee7 100644 (file)
@@ -2308,6 +2308,9 @@ static int wm8904_i2c_probe(struct i2c_client *i2c)
        regmap_update_bits(wm8904->regmap, WM8904_BIAS_CONTROL_0,
                            WM8904_POBCTRL, 0);
 
+       /* Fill the cache for the ADC test register */
+       regmap_read(wm8904->regmap, WM8904_ADC_TEST_0, &val);
+
        /* Can leave the device powered off until we need it */
        regcache_cache_only(wm8904->regmap, true);
        regulator_bulk_disable(ARRAY_SIZE(wm8904->supplies), wm8904->supplies);
index 3f08082..9d01225 100644 (file)
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 // Copyright 2018 NXP
 
 #include <linux/bitfield.h>
@@ -1254,4 +1254,4 @@ module_platform_driver(fsl_micfil_driver);
 
 MODULE_AUTHOR("Cosmin-Gabriel Samoila <cosmin.samoila@nxp.com>");
 MODULE_DESCRIPTION("NXP PDM Microphone Interface (MICFIL) driver");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("Dual BSD/GPL");
index 9237a1c..fee9fe3 100644 (file)
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
 /*
  * PDM Microphone Interface for the NXP i.MX SoC
  * Copyright 2018 NXP
index 5e09f63..f7676d3 100644 (file)
@@ -507,12 +507,6 @@ static int fsl_sai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq)
                                   savediv / 2 - 1);
        }
 
-       if (sai->soc_data->max_register >= FSL_SAI_MCTL) {
-               /* SAI is in master mode at this point, so enable MCLK */
-               regmap_update_bits(sai->regmap, FSL_SAI_MCTL,
-                                  FSL_SAI_MCTL_MCLK_EN, FSL_SAI_MCTL_MCLK_EN);
-       }
-
        return 0;
 }
 
@@ -719,7 +713,7 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir)
        u32 xcsr, count = 100;
 
        regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs),
-                          FSL_SAI_CSR_TERE, 0);
+                          FSL_SAI_CSR_TERE | FSL_SAI_CSR_BCE, 0);
 
        /* TERE will remain set till the end of current frame */
        do {
index 8254c35..550df87 100644 (file)
@@ -91,6 +91,7 @@
 /* SAI Transmit/Receive Control Register */
 #define FSL_SAI_CSR_TERE       BIT(31)
 #define FSL_SAI_CSR_SE         BIT(30)
+#define FSL_SAI_CSR_BCE                BIT(28)
 #define FSL_SAI_CSR_FR         BIT(25)
 #define FSL_SAI_CSR_SR         BIT(24)
 #define FSL_SAI_CSR_xF_SHIFT   16
index 015c370..3fd26f2 100644 (file)
@@ -751,6 +751,8 @@ static int fsl_spdif_trigger(struct snd_pcm_substream *substream,
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
                regmap_update_bits(regmap, REG_SPDIF_SCR, dmaen, 0);
                regmap_update_bits(regmap, REG_SPDIF_SIE, intr, 0);
+               regmap_write(regmap, REG_SPDIF_STL, 0x0);
+               regmap_write(regmap, REG_SPDIF_STR, 0x0);
                break;
        default:
                return -EINVAL;
index dbee8c9..0201029 100644 (file)
@@ -476,7 +476,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
                        DMI_MATCH(DMI_PRODUCT_NAME, "Lunar Lake Client Platform"),
                },
-               .driver_data = (void *)(RT711_JD2_100K),
+               .driver_data = (void *)(RT711_JD2),
        },
        {}
 };
index c4a16e4..ad130d9 100644 (file)
@@ -99,9 +99,9 @@ static int cs42l42_rtd_init(struct snd_soc_pcm_runtime *rtd)
        jack = &ctx->sdw_headset;
 
        snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_PLAYPAUSE);
-       snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOICECOMMAND);
-       snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP);
-       snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN);
+       snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOLUMEUP);
+       snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEDOWN);
+       snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOICECOMMAND);
 
        ret = snd_soc_component_set_jack(component, jack, NULL);
 
index 9883dc7..63333a2 100644 (file)
@@ -30,27 +30,32 @@ int axg_tdm_formatter_set_channel_masks(struct regmap *map,
                                        struct axg_tdm_stream *ts,
                                        unsigned int offset)
 {
-       unsigned int val, ch = ts->channels;
-       unsigned long mask;
-       int i, j;
+       unsigned int ch = ts->channels;
+       u32 val[AXG_TDM_NUM_LANES];
+       int i, j, k;
+
+       /*
+        * We need to mimick the slot distribution used by the HW to keep the
+        * channel placement consistent regardless of the number of channel
+        * in the stream. This is why the odd algorithm below is used.
+        */
+       memset(val, 0, sizeof(*val) * AXG_TDM_NUM_LANES);
 
        /*
         * Distribute the channels of the stream over the available slots
-        * of each TDM lane
+        * of each TDM lane. We need to go over the 32 slots ...
         */
-       for (i = 0; i < AXG_TDM_NUM_LANES; i++) {
-               val = 0;
-               mask = ts->mask[i];
-
-               for (j = find_first_bit(&mask, 32);
-                    (j < 32) && ch;
-                    j = find_next_bit(&mask, 32, j + 1)) {
-                       val |= 1 << j;
-                       ch -= 1;
+       for (i = 0; (i < 32) && ch; i += 2) {
+               /* ... of all the lanes ... */
+               for (j = 0; j < AXG_TDM_NUM_LANES; j++) {
+                       /* ... then distribute the channels in pairs */
+                       for (k = 0; k < 2; k++) {
+                               if ((BIT(i + k) & ts->mask[j]) && ch) {
+                                       val[j] |= BIT(i + k);
+                                       ch -= 1;
+                               }
+                       }
                }
-
-               regmap_write(map, offset, val);
-               offset += regmap_get_reg_stride(map);
        }
 
        /*
@@ -63,6 +68,11 @@ int axg_tdm_formatter_set_channel_masks(struct regmap *map,
                return -EINVAL;
        }
 
+       for (i = 0; i < AXG_TDM_NUM_LANES; i++) {
+               regmap_write(map, offset, val[i]);
+               offset += regmap_get_reg_stride(map);
+       }
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(axg_tdm_formatter_set_channel_masks);
index 31e0bad..dbff55a 100644 (file)
@@ -476,7 +476,7 @@ static int q6afe_mi2s_set_sysclk(struct snd_soc_dai *dai,
 
 static const struct snd_soc_dapm_route q6afe_dapm_routes[] = {
        {"HDMI Playback", NULL, "HDMI_RX"},
-       {"Display Port Playback", NULL, "DISPLAY_PORT_RX"},
+       {"DISPLAY_PORT_RX_0 Playback", NULL, "DISPLAY_PORT_RX"},
        {"Slimbus Playback", NULL, "SLIMBUS_0_RX"},
        {"Slimbus1 Playback", NULL, "SLIMBUS_1_RX"},
        {"Slimbus2 Playback", NULL, "SLIMBUS_2_RX"},
index 5eb0b86..c90db6d 100644 (file)
@@ -840,6 +840,7 @@ static const struct snd_soc_component_driver q6apm_fe_dai_component = {
        .pointer        = q6apm_dai_pointer,
        .trigger        = q6apm_dai_trigger,
        .compress_ops   = &q6apm_dai_compress_ops,
+       .use_dai_pcm_id = true,
 };
 
 static int q6apm_dai_probe(struct platform_device *pdev)
index 7bfac94..5d44d07 100644 (file)
@@ -511,6 +511,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op)
 
        switch (hdr->opcode) {
        case DATA_CMD_RSP_WR_SH_MEM_EP_DATA_BUFFER_DONE_V2:
+               if (!graph->ar_graph)
+                       break;
                client_event = APM_CLIENT_EVENT_DATA_WRITE_DONE;
                mutex_lock(&graph->lock);
                token = hdr->token & APM_WRITE_TOKEN_MASK;
@@ -544,6 +546,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op)
                wake_up(&graph->cmd_wait);
                break;
        case DATA_CMD_RSP_RD_SH_MEM_EP_DATA_BUFFER_V2:
+               if (!graph->ar_graph)
+                       break;
                client_event = APM_CLIENT_EVENT_DATA_READ_DONE;
                mutex_lock(&graph->lock);
                rd_done = data->payload;
@@ -649,8 +653,9 @@ int q6apm_graph_close(struct q6apm_graph *graph)
 {
        struct audioreach_graph *ar_graph = graph->ar_graph;
 
-       gpr_free_port(graph->port);
+       graph->ar_graph = NULL;
        kref_put(&ar_graph->refcount, q6apm_put_audioreach_graph);
+       gpr_free_port(graph->port);
        kfree(graph);
 
        return 0;
index cccc59b..130b22a 100644 (file)
@@ -1277,8 +1277,8 @@ int audioreach_tplg_init(struct snd_soc_component *component)
 
        ret = snd_soc_tplg_component_load(component, &audioreach_tplg_ops, fw);
        if (ret < 0) {
-               dev_err(dev, "tplg component load failed%d\n", ret);
-               ret = -EINVAL;
+               if (ret != -EPROBE_DEFER)
+                       dev_err(dev, "tplg component load failed: %d\n", ret);
        }
 
        release_firmware(fw);
index 11bc525..1a0bde2 100644 (file)
@@ -1988,8 +1988,10 @@ static int snd_soc_bind_card(struct snd_soc_card *card)
        /* probe all components used by DAI links on this card */
        ret = soc_probe_link_components(card);
        if (ret < 0) {
-               dev_err(card->dev,
-                       "ASoC: failed to instantiate card %d\n", ret);
+               if (ret != -EPROBE_DEFER) {
+                       dev_err(card->dev,
+                               "ASoC: failed to instantiate card %d\n", ret);
+               }
                goto probe_end;
        }
 
index 8896227..3aa6b98 100644 (file)
@@ -38,6 +38,7 @@ static inline int _soc_pcm_ret(struct snd_soc_pcm_runtime *rtd,
        switch (ret) {
        case -EPROBE_DEFER:
        case -ENOTSUPP:
+       case -EINVAL:
                break;
        default:
                dev_err(rtd->dev,
@@ -2466,8 +2467,11 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream)
 
        /* there is no point preparing this FE if there are no BEs */
        if (list_empty(&fe->dpcm[stream].be_clients)) {
-               dev_err(fe->dev, "ASoC: no backend DAIs enabled for %s\n",
-                               fe->dai_link->name);
+               /* dev_err_once() for visibility, dev_dbg() for debugging UCM profiles */
+               dev_err_once(fe->dev, "ASoC: no backend DAIs enabled for %s, possibly missing ALSA mixer-based routing or UCM profile\n",
+                            fe->dai_link->name);
+               dev_dbg(fe->dev, "ASoC: no backend DAIs enabled for %s\n",
+                       fe->dai_link->name);
                ret = -EINVAL;
                goto out;
        }
index 8add361..ad08d4f 100644 (file)
@@ -1732,7 +1732,8 @@ static int soc_tplg_fe_link_create(struct soc_tplg *tplg,
 
        ret = snd_soc_add_pcm_runtimes(tplg->comp->card, link, 1);
        if (ret < 0) {
-               dev_err(tplg->dev, "ASoC: adding FE link failed\n");
+               if (ret != -EPROBE_DEFER)
+                       dev_err(tplg->dev, "ASoC: adding FE link failed\n");
                goto err;
        }
 
@@ -2492,8 +2493,11 @@ static int soc_tplg_process_headers(struct soc_tplg *tplg)
                        /* load the header object */
                        ret = soc_tplg_load_header(tplg, hdr);
                        if (ret < 0) {
-                               dev_err(tplg->dev,
-                                       "ASoC: topology: could not load header: %d\n", ret);
+                               if (ret != -EPROBE_DEFER) {
+                                       dev_err(tplg->dev,
+                                               "ASoC: topology: could not load header: %d\n",
+                                               ret);
+                               }
                                return ret;
                        }
 
index 2ae76bc..afb5054 100644 (file)
@@ -217,6 +217,7 @@ int configure_and_run_sha_dma(struct acp_dev_data *adata, void *image_addr,
                              unsigned int image_length)
 {
        struct snd_sof_dev *sdev = adata->dev;
+       const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata);
        unsigned int tx_count, fw_qualifier, val;
        int ret;
 
@@ -251,9 +252,12 @@ int configure_and_run_sha_dma(struct acp_dev_data *adata, void *image_addr,
                return ret;
        }
 
-       ret = psp_send_cmd(adata, MBOX_ACP_SHA_DMA_COMMAND);
-       if (ret)
-               return ret;
+       /* psp_send_cmd only required for renoir platform (rev - 3) */
+       if (desc->rev == 3) {
+               ret = psp_send_cmd(adata, MBOX_ACP_SHA_DMA_COMMAND);
+               if (ret)
+                       return ret;
+       }
 
        ret = snd_sof_dsp_read_poll_timeout(sdev, ACP_DSP_BAR, ACP_SHA_DSP_FW_QUALIFIER,
                                            fw_qualifier, fw_qualifier & DSP_FW_RUN_ENABLE,
index f351379..f33051e 100644 (file)
@@ -372,6 +372,7 @@ static const struct hda_dai_widget_dma_ops hda_ipc4_chain_dma_ops = {
 static int hda_ipc3_post_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *cpu_dai,
                                 struct snd_pcm_substream *substream, int cmd)
 {
+       struct hdac_ext_stream *hext_stream = hda_get_hext_stream(sdev, cpu_dai, substream);
        struct snd_soc_dapm_widget *w = snd_soc_dai_get_widget(cpu_dai, substream->stream);
 
        switch (cmd) {
@@ -379,9 +380,17 @@ static int hda_ipc3_post_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *c
        case SNDRV_PCM_TRIGGER_STOP:
        {
                struct snd_sof_dai_config_data data = { 0 };
+               int ret;
 
                data.dai_data = DMA_CHAN_INVALID;
-               return hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_HW_FREE, &data);
+               ret = hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_HW_FREE, &data);
+               if (ret < 0)
+                       return ret;
+
+               if (cmd == SNDRV_PCM_TRIGGER_STOP)
+                       return hda_link_dma_cleanup(substream, hext_stream, cpu_dai);
+
+               break;
        }
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
                return hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_PAUSE, NULL);
index 3297dea..863865f 100644 (file)
@@ -107,9 +107,8 @@ hda_dai_get_ops(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai
        return sdai->platform_private;
 }
 
-static int hda_link_dma_cleanup(struct snd_pcm_substream *substream,
-                               struct hdac_ext_stream *hext_stream,
-                               struct snd_soc_dai *cpu_dai)
+int hda_link_dma_cleanup(struct snd_pcm_substream *substream, struct hdac_ext_stream *hext_stream,
+                        struct snd_soc_dai *cpu_dai)
 {
        const struct hda_dai_widget_dma_ops *ops = hda_dai_get_ops(substream, cpu_dai);
        struct sof_intel_hda_stream *hda_stream;
index 3f7c6fb..5b9e4eb 100644 (file)
@@ -963,5 +963,7 @@ const struct hda_dai_widget_dma_ops *
 hda_select_dai_widget_ops(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget);
 int hda_dai_config(struct snd_soc_dapm_widget *w, unsigned int flags,
                   struct snd_sof_dai_config_data *data);
+int hda_link_dma_cleanup(struct snd_pcm_substream *substream, struct hdac_ext_stream *hext_stream,
+                        struct snd_soc_dai *cpu_dai);
 
 #endif
index 1d3bca2..35da85a 100644 (file)
@@ -186,7 +186,6 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
        struct snd_sof_dfsentry *dfse = file->private_data;
        struct sof_ipc_trace_filter_elem *elems = NULL;
        struct snd_sof_dev *sdev = dfse->sdev;
-       loff_t pos = 0;
        int num_elems;
        char *string;
        int ret;
@@ -201,11 +200,11 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
        if (!string)
                return -ENOMEM;
 
-       /* assert null termination */
-       string[count] = 0;
-       ret = simple_write_to_buffer(string, count, &pos, from, count);
-       if (ret < 0)
+       if (copy_from_user(string, from, count)) {
+               ret = -EFAULT;
                goto error;
+       }
+       string[count] = '\0';
 
        ret = trace_filter_parse(sdev, string, &num_elems, &elems);
        if (ret < 0)
index 2c5aac3..580960f 100644 (file)
@@ -1001,7 +1001,7 @@ void sof_ipc3_do_rx_work(struct snd_sof_dev *sdev, struct sof_ipc_cmd_hdr *hdr,
 
        ipc3_log_header(sdev->dev, "ipc rx", hdr->cmd);
 
-       if (hdr->size < sizeof(hdr) || hdr->size > SOF_IPC_MSG_MAX_SIZE) {
+       if (hdr->size < sizeof(*hdr) || hdr->size > SOF_IPC_MSG_MAX_SIZE) {
                dev_err(sdev->dev, "The received message size is invalid: %u\n",
                        hdr->size);
                return;
index 0c905bd..027416e 100644 (file)
@@ -708,6 +708,9 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component,
        struct snd_sof_pcm *spcm;
 
        spcm = snd_sof_find_spcm_dai(component, rtd);
+       if (!spcm)
+               return -EINVAL;
+
        time_info = spcm->stream[substream->stream].private;
        /* delay calculation is not supported by current fw_reg ABI */
        if (!time_info)
index a4e1a70..11361e1 100644 (file)
@@ -1731,6 +1731,9 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
 
        *ipc_config_size = ipc_size;
 
+       /* update pipeline memory usage */
+       sof_ipc4_update_resource_usage(sdev, swidget, &copier_data->base_config);
+
        /* copy IPC data */
        memcpy(*ipc_config_data, (void *)copier_data, sizeof(*copier_data));
        if (gtw_cfg_config_length)
@@ -1743,9 +1746,6 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
                       gtw_cfg_config_length,
                       &ipc4_copier->dma_config_tlv, dma_config_tlv_size);
 
-       /* update pipeline memory usage */
-       sof_ipc4_update_resource_usage(sdev, swidget, &copier_data->base_config);
-
        return 0;
 }
 
index bd0b10c..7d003f0 100644 (file)
@@ -2,7 +2,7 @@
 //
 // tegra210_adx.c - Tegra210 ADX driver
 //
-// Copyright (c) 2021 NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2021-2023 NVIDIA CORPORATION.  All rights reserved.
 
 #include <linux/clk.h>
 #include <linux/device.h>
@@ -175,10 +175,20 @@ static int tegra210_adx_get_byte_map(struct snd_kcontrol *kcontrol,
        mc = (struct soc_mixer_control *)kcontrol->private_value;
        enabled = adx->byte_mask[mc->reg / 32] & (1 << (mc->reg % 32));
 
+       /*
+        * TODO: Simplify this logic to just return from bytes_map[]
+        *
+        * Presently below is required since bytes_map[] is
+        * tightly packed and cannot store the control value of 256.
+        * Byte mask state is used to know if 256 needs to be returned.
+        * Note that for control value of 256, the put() call stores 0
+        * in the bytes_map[] and disables the corresponding bit in
+        * byte_mask[].
+        */
        if (enabled)
                ucontrol->value.integer.value[0] = bytes_map[mc->reg];
        else
-               ucontrol->value.integer.value[0] = 0;
+               ucontrol->value.integer.value[0] = 256;
 
        return 0;
 }
@@ -192,19 +202,19 @@ static int tegra210_adx_put_byte_map(struct snd_kcontrol *kcontrol,
        int value = ucontrol->value.integer.value[0];
        struct soc_mixer_control *mc =
                (struct soc_mixer_control *)kcontrol->private_value;
+       unsigned int mask_val = adx->byte_mask[mc->reg / 32];
 
-       if (value == bytes_map[mc->reg])
+       if (value >= 0 && value <= 255)
+               mask_val |= (1 << (mc->reg % 32));
+       else
+               mask_val &= ~(1 << (mc->reg % 32));
+
+       if (mask_val == adx->byte_mask[mc->reg / 32])
                return 0;
 
-       if (value >= 0 && value <= 255) {
-               /* update byte map and enable slot */
-               bytes_map[mc->reg] = value;
-               adx->byte_mask[mc->reg / 32] |= (1 << (mc->reg % 32));
-       } else {
-               /* reset byte map and disable slot */
-               bytes_map[mc->reg] = 0;
-               adx->byte_mask[mc->reg / 32] &= ~(1 << (mc->reg % 32));
-       }
+       /* Update byte map and slot */
+       bytes_map[mc->reg] = value % 256;
+       adx->byte_mask[mc->reg / 32] = mask_val;
 
        return 1;
 }
index 782a141..1798769 100644 (file)
@@ -2,7 +2,7 @@
 //
 // tegra210_amx.c - Tegra210 AMX driver
 //
-// Copyright (c) 2021 NVIDIA CORPORATION.  All rights reserved.
+// Copyright (c) 2021-2023 NVIDIA CORPORATION.  All rights reserved.
 
 #include <linux/clk.h>
 #include <linux/device.h>
@@ -203,10 +203,20 @@ static int tegra210_amx_get_byte_map(struct snd_kcontrol *kcontrol,
        else
                enabled = amx->byte_mask[0] & (1 << reg);
 
+       /*
+        * TODO: Simplify this logic to just return from bytes_map[]
+        *
+        * Presently below is required since bytes_map[] is
+        * tightly packed and cannot store the control value of 256.
+        * Byte mask state is used to know if 256 needs to be returned.
+        * Note that for control value of 256, the put() call stores 0
+        * in the bytes_map[] and disables the corresponding bit in
+        * byte_mask[].
+        */
        if (enabled)
                ucontrol->value.integer.value[0] = bytes_map[reg];
        else
-               ucontrol->value.integer.value[0] = 0;
+               ucontrol->value.integer.value[0] = 256;
 
        return 0;
 }
@@ -221,25 +231,19 @@ static int tegra210_amx_put_byte_map(struct snd_kcontrol *kcontrol,
        unsigned char *bytes_map = (unsigned char *)&amx->map;
        int reg = mc->reg;
        int value = ucontrol->value.integer.value[0];
+       unsigned int mask_val = amx->byte_mask[reg / 32];
 
-       if (value == bytes_map[reg])
+       if (value >= 0 && value <= 255)
+               mask_val |= (1 << (reg % 32));
+       else
+               mask_val &= ~(1 << (reg % 32));
+
+       if (mask_val == amx->byte_mask[reg / 32])
                return 0;
 
-       if (value >= 0 && value <= 255) {
-               /* Update byte map and enable slot */
-               bytes_map[reg] = value;
-               if (reg > 31)
-                       amx->byte_mask[1] |= (1 << (reg - 32));
-               else
-                       amx->byte_mask[0] |= (1 << reg);
-       } else {
-               /* Reset byte map and disable slot */
-               bytes_map[reg] = 0;
-               if (reg > 31)
-                       amx->byte_mask[1] &= ~(1 << (reg - 32));
-               else
-                       amx->byte_mask[0] &= ~(1 << reg);
-       }
+       /* Update byte map and slot */
+       bytes_map[reg] = value % 256;
+       amx->byte_mask[reg / 32] = mask_val;
 
        return 1;
 }
index f4bd1e8..23260aa 100644 (file)
@@ -374,6 +374,15 @@ static const struct usbmix_name_map corsair_virtuoso_map[] = {
        { 0 }
 };
 
+/* Microsoft USB Link headset */
+/* a guess work: raw playback volume values are from 2 to 129 */
+static const struct usbmix_dB_map ms_usb_link_dB = { -3225, 0, true };
+static const struct usbmix_name_map ms_usb_link_map[] = {
+       { 9, NULL, .dB = &ms_usb_link_dB },
+       { 10, NULL }, /* Headset Capture volume; seems non-working, disabled */
+       { 0 }   /* terminator */
+};
+
 /* ASUS ROG Zenith II with Realtek ALC1220-VB */
 static const struct usbmix_name_map asus_zenith_ii_map[] = {
        { 19, NULL, 12 }, /* FU, Input Gain Pad - broken response, disabled */
@@ -668,6 +677,11 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
                .id = USB_ID(0x1395, 0x0025),
                .map = sennheiser_pc8_map,
        },
+       {
+               /* Microsoft USB Link headset */
+               .id = USB_ID(0x045e, 0x083c),
+               .map = ms_usb_link_map,
+       },
        { 0 } /* terminator */
 };
 
index efb4a33..5d72dc8 100644 (file)
@@ -4507,6 +4507,35 @@ YAMAHA_DEVICE(0x7010, "UB99"),
                }
        }
 },
+{
+       /* Advanced modes of the Mythware XA001AU.
+        * For the standard mode, Mythware XA001AU has ID ffad:a001
+        */
+       USB_DEVICE_VENDOR_SPEC(0xffad, 0xa001),
+       .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+               .vendor_name = "Mythware",
+               .product_name = "XA001AU",
+               .ifnum = QUIRK_ANY_INTERFACE,
+               .type = QUIRK_COMPOSITE,
+               .data = (const struct snd_usb_audio_quirk[]) {
+                       {
+                               .ifnum = 0,
+                               .type = QUIRK_IGNORE_INTERFACE,
+                       },
+                       {
+                               .ifnum = 1,
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE,
+                       },
+                       {
+                               .ifnum = 2,
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE,
+                       },
+                       {
+                               .ifnum = -1
+                       }
+               }
+       }
+},
 
 #undef USB_DEVICE_VENDOR_SPEC
 #undef USB_AUDIO_DEVICE
index 30bcb80..598659d 100644 (file)
@@ -1876,8 +1876,10 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 
        /* XMOS based USB DACs */
        switch (chip->usb_id) {
-       case USB_ID(0x1511, 0x0037): /* AURALiC VEGA */
-       case USB_ID(0x21ed, 0xd75a): /* Accuphase DAC-60 option card */
+       case USB_ID(0x139f, 0x5504): /* Nagra DAC */
+       case USB_ID(0x20b1, 0x3089): /* Mola-Mola DAC */
+       case USB_ID(0x2522, 0x0007): /* LH Labs Geek Out 1V5 */
+       case USB_ID(0x2522, 0x0009): /* LH Labs Geek Pulse X Inifinity 2V0 */
        case USB_ID(0x2522, 0x0012): /* LH Labs VI DAC Infinity */
        case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */
                if (fp->altsetting == 2)
@@ -1887,14 +1889,18 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
        case USB_ID(0x0d8c, 0x0316): /* Hegel HD12 DSD */
        case USB_ID(0x10cb, 0x0103): /* The Bit Opus #3; with fp->dsd_raw */
        case USB_ID(0x16d0, 0x06b2): /* NuPrime DAC-10 */
-       case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */
+       case USB_ID(0x16d0, 0x06b4): /* NuPrime Audio HD-AVP/AVA */
        case USB_ID(0x16d0, 0x0733): /* Furutech ADL Stratos */
+       case USB_ID(0x16d0, 0x09d8): /* NuPrime IDA-8 */
        case USB_ID(0x16d0, 0x09db): /* NuPrime Audio DAC-9 */
+       case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */
        case USB_ID(0x1db5, 0x0003): /* Bryston BDA3 */
+       case USB_ID(0x20a0, 0x4143): /* WaveIO USB Audio 2.0 */
        case USB_ID(0x22e1, 0xca01): /* HDTA Serenade DSD */
        case USB_ID(0x249c, 0x9326): /* M2Tech Young MkIII */
        case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */
        case USB_ID(0x2622, 0x0041): /* Audiolab M-DAC+ */
+       case USB_ID(0x278b, 0x5100): /* Rotel RC-1590 */
        case USB_ID(0x27f7, 0x3002): /* W4S DAC-2v2SE */
        case USB_ID(0x29a2, 0x0086): /* Mutec MC3+ USB */
        case USB_ID(0x6b42, 0x0042): /* MSB Technology */
@@ -1904,9 +1910,6 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 
        /* Amanero Combo384 USB based DACs with native DSD support */
        case USB_ID(0x16d0, 0x071a):  /* Amanero - Combo384 */
-       case USB_ID(0x2ab6, 0x0004):  /* T+A DAC8DSD-V2.0, MP1000E-V2.0, MP2000R-V2.0, MP2500R-V2.0, MP3100HV-V2.0 */
-       case USB_ID(0x2ab6, 0x0005):  /* T+A USB HD Audio 1 */
-       case USB_ID(0x2ab6, 0x0006):  /* T+A USB HD Audio 2 */
                if (fp->altsetting == 2) {
                        switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
                        case 0x199:
@@ -2013,6 +2016,9 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                   QUIRK_FLAG_IGNORE_CTL_ERROR),
        DEVICE_FLG(0x041e, 0x4080, /* Creative Live Cam VF0610 */
                   QUIRK_FLAG_GET_SAMPLE_RATE),
+       DEVICE_FLG(0x045e, 0x083c, /* MS USB Link headset */
+                  QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_CTL_MSG_DELAY |
+                  QUIRK_FLAG_DISABLE_AUTOSUSPEND),
        DEVICE_FLG(0x046d, 0x084c, /* Logitech ConferenceCam Connect */
                   QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_CTL_MSG_DELAY_1M),
        DEVICE_FLG(0x046d, 0x0991, /* Logitech QuickCam Pro */
@@ -2048,6 +2054,9 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                   QUIRK_FLAG_IFACE_DELAY),
        DEVICE_FLG(0x0644, 0x805f, /* TEAC Model 12 */
                   QUIRK_FLAG_FORCE_IFACE_RESET),
+       DEVICE_FLG(0x0644, 0x806b, /* TEAC UD-701 */
+                  QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY |
+                  QUIRK_FLAG_IFACE_DELAY),
        DEVICE_FLG(0x06f8, 0xb000, /* Hercules DJ Console (Windows Edition) */
                   QUIRK_FLAG_IGNORE_CTL_ERROR),
        DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */
@@ -2086,6 +2095,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                   QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY),
        DEVICE_FLG(0x154e, 0x3006, /* Marantz SA-14S1 */
                   QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY),
+       DEVICE_FLG(0x154e, 0x300b, /* Marantz SA-KI RUBY / SA-12 */
+                  QUIRK_FLAG_DSD_RAW),
        DEVICE_FLG(0x154e, 0x500e, /* Denon DN-X1600 */
                   QUIRK_FLAG_IGNORE_CLOCK_SOURCE),
        DEVICE_FLG(0x1686, 0x00dd, /* Zoom R16/24 */
@@ -2130,6 +2141,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                   QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
        DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */
                   QUIRK_FLAG_GET_SAMPLE_RATE),
+       DEVICE_FLG(0x21b4, 0x0230, /* Ayre QB-9 Twenty */
+                  QUIRK_FLAG_DSD_RAW),
+       DEVICE_FLG(0x21b4, 0x0232, /* Ayre QX-5 Twenty */
+                  QUIRK_FLAG_DSD_RAW),
        DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */
                   QUIRK_FLAG_SET_IFACE_FIRST),
        DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */
@@ -2172,12 +2187,18 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                   QUIRK_FLAG_VALIDATE_RATES),
        VENDOR_FLG(0x1235, /* Focusrite Novation */
                   QUIRK_FLAG_VALIDATE_RATES),
+       VENDOR_FLG(0x1511, /* AURALiC */
+                  QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0x152a, /* Thesycon devices */
                   QUIRK_FLAG_DSD_RAW),
+       VENDOR_FLG(0x18d1, /* iBasso devices */
+                  QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0x1de7, /* Phoenix Audio */
                   QUIRK_FLAG_GET_SAMPLE_RATE),
        VENDOR_FLG(0x20b1, /* XMOS based devices */
                   QUIRK_FLAG_DSD_RAW),
+       VENDOR_FLG(0x21ed, /* Accuphase Laboratory */
+                  QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0x22d9, /* Oppo */
                   QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0x23ba, /* Playback Design */
@@ -2193,10 +2214,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
                   QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0x2ab6, /* T+A devices */
                   QUIRK_FLAG_DSD_RAW),
+       VENDOR_FLG(0x2d87, /* Cayin device */
+                  QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0x3336, /* HEM devices */
                   QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0x3353, /* Khadas devices */
                   QUIRK_FLAG_DSD_RAW),
+       VENDOR_FLG(0x35f4, /* MSB Technology */
+                  QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0x3842, /* EVGA */
                   QUIRK_FLAG_DSD_RAW),
        VENDOR_FLG(0xc502, /* HiBy devices */
index 683ca3a..5f6f848 100644 (file)
 #define APPLE_CPU_PART_M1_FIRESTORM_MAX        0x029
 #define APPLE_CPU_PART_M2_BLIZZARD     0x032
 #define APPLE_CPU_PART_M2_AVALANCHE    0x033
+#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
+#define APPLE_CPU_PART_M2_AVALANCHE_PRO        0x035
+#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
+#define APPLE_CPU_PART_M2_AVALANCHE_MAX        0x039
 
 #define AMPERE_CPU_PART_AMPERE1                0xAC3
 
 #define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
 #define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
 #define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
+#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
+#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
+#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
+#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
 #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
 
 /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/tools/arch/arm64/include/uapi/asm/bitsperlong.h b/tools/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..485d60b
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_BITSPERLONG_H */
diff --git a/tools/arch/riscv/include/uapi/asm/bitsperlong.h b/tools/arch/riscv/include/uapi/asm/bitsperlong.h
new file mode 100644 (file)
index 0000000..0b9b58b
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H
+#define _UAPI_ASM_RISCV_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */
index cb8ca46..1f6d904 100644 (file)
@@ -14,7 +14,7 @@
  * Defines x86 CPU feature bits
  */
 #define NCAPINTS                       21         /* N 32-bit words worth of info */
-#define NBUGINTS                       1          /* N 32-bit bug flags */
+#define NBUGINTS                       2          /* N 32-bit bug flags */
 
 /*
  * Note: If the comment begins with a quoted string, that string is used
index 3aedae6..a00a53e 100644 (file)
 #define MSR_AMD64_DE_CFG               0xc0011029
 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT   1
 #define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE      BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
 
 #define MSR_AMD64_BU_CFG2              0xc001102a
 #define MSR_AMD64_IBSFETCHCTL          0xc0011030
index bc48a4d..4798f9d 100644 (file)
@@ -26,3 +26,6 @@
 #ifndef __NR_setns
 #define __NR_setns 346
 #endif
+#ifdef __NR_seccomp
+#define __NR_seccomp 354
+#endif
index f70d2ca..d0f2043 100644 (file)
@@ -26,3 +26,6 @@
 #ifndef __NR_getcpu
 #define __NR_getcpu 309
 #endif
+#ifndef __NR_seccomp
+#define __NR_seccomp 317
+#endif
index 0f0aa9b..2cd6dbb 100644 (file)
@@ -208,7 +208,7 @@ $(OUTPUT)test-libtraceevent.bin:
        $(BUILD) -ltraceevent
 
 $(OUTPUT)test-libtracefs.bin:
-       $(BUILD) -ltracefs
+        $(BUILD) $(shell $(PKG_CONFIG) --cflags libtraceevent 2>/dev/null) -ltracefs
 
 $(OUTPUT)test-libcrypto.bin:
        $(BUILD) -lcrypto
index a0f4cab..b2c2946 100644 (file)
@@ -40,7 +40,8 @@ $(OUTPUT)counter_example: $(COUNTER_EXAMPLE)
 clean:
        rm -f $(ALL_PROGRAMS)
        rm -rf $(OUTPUT)include/linux/counter.h
-       rmdir -p $(OUTPUT)include/linux
+       rm -df $(OUTPUT)include/linux
+       rm -df $(OUTPUT)include
        find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
 
 install: $(ALL_PROGRAMS)
index e721290..4467979 100755 (executable)
@@ -164,7 +164,7 @@ def recursive_file_lookup(path, file_map):
 def get_all_devices_test_status(file_map):
 
         for device in file_map:
-                if (get_test_state(locate_state(device, file_map)) is 1):
+                if (get_test_state(locate_state(device, file_map)) == 1):
                         print("Testing = ON for: {}"
                               .format(device.split("/")[5]))
                 else:
@@ -203,7 +203,7 @@ def write_test_files(path, value):
 def set_test_state(state_path, state_value, quiet):
 
         write_test_files(state_path, state_value)
-        if (get_test_state(state_path) is 1):
+        if (get_test_state(state_path) == 1):
                 if (not quiet):
                         print("Testing = ON for device: {}"
                               .format(state_path.split("/")[5]))
index 9d36c8c..1684216 100644 (file)
 # define __always_inline       inline __attribute__((always_inline))
 #endif
 
+#ifndef __always_unused
+#define __always_unused __attribute__((__unused__))
+#endif
+
+#ifndef __noreturn
+#define __noreturn __attribute__((__noreturn__))
+#endif
+
+#ifndef unreachable
+#define unreachable() __builtin_unreachable()
+#endif
+
 #ifndef noinline
 #define noinline
 #endif
@@ -190,4 +202,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 #define ___PASTE(a, b) a##b
 #define __PASTE(a, b) ___PASTE(a, b)
 
+#ifndef OPTIMIZER_HIDE_VAR
+/* Make the optimizer believe the variable can be manipulated arbitrarily. */
+#define OPTIMIZER_HIDE_VAR(var)                                                \
+       __asm__ ("" : "=r" (var) : "0" (var))
+#endif
+
 #endif /* _TOOLS_LINUX_COMPILER_H */
index dd7d8e1..fd6c1cb 100644 (file)
@@ -817,8 +817,11 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
 #define __NR_set_mempolicy_home_node 450
 __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
 
+#define __NR_cachestat 451
+__SYSCALL(__NR_cachestat, sys_cachestat)
+
 #undef __NR_syscalls
-#define __NR_syscalls 451
+#define __NR_syscalls 452
 
 /*
  * 32 bit systems traditionally used different
index dba7c5a..7000e59 100644 (file)
@@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_ENGINE_SEMA(class, instance) \
        __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
 
-#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+/*
+ * Top 4 bits of every non-engine counter are GT id.
+ */
+#define __I915_PMU_GT_SHIFT (60)
+
+#define ___I915_PMU_OTHER(gt, x) \
+       (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
+       ((__u64)(gt) << __I915_PMU_GT_SHIFT))
+
+#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
 
 #define I915_PMU_ACTUAL_FREQUENCY      __I915_PMU_OTHER(0)
 #define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
@@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample {
 
 #define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
 
+#define __I915_PMU_ACTUAL_FREQUENCY(gt)                ___I915_PMU_OTHER(gt, 0)
+#define __I915_PMU_REQUESTED_FREQUENCY(gt)     ___I915_PMU_OTHER(gt, 1)
+#define __I915_PMU_INTERRUPTS(gt)              ___I915_PMU_OTHER(gt, 2)
+#define __I915_PMU_RC6_RESIDENCY(gt)           ___I915_PMU_OTHER(gt, 3)
+#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt)  ___I915_PMU_OTHER(gt, 4)
+
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
 #define I915_NR_TEX_REGIONS 255        /* table size 2k - maximum due to use
@@ -659,7 +674,8 @@ typedef struct drm_i915_irq_wait {
  * If the IOCTL is successful, the returned parameter will be set to one of the
  * following values:
  *  * 0 if HuC firmware load is not complete,
- *  * 1 if HuC firmware is authenticated and running.
+ *  * 1 if HuC firmware is loaded and fully authenticated,
+ *  * 2 if HuC firmware is loaded and authenticated for clear media only
  */
 #define I915_PARAM_HUC_STATUS           42
 
@@ -771,6 +787,25 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57
 
+/*
+ * Query the status of PXP support in i915.
+ *
+ * The query can fail in the following scenarios with the listed error codes:
+ *     -ENODEV = PXP support is not available on the GPU device or in the
+ *               kernel due to missing component drivers or kernel configs.
+ *
+ * If the IOCTL is successful, the returned parameter will be set to one of
+ * the following values:
+ *     1 = PXP feature is supported and is ready for use.
+ *     2 = PXP feature is supported but should be ready soon (pending
+ *         initialization of non-i915 system dependencies).
+ *
+ * NOTE: When param is supported (positive return values), user space should
+ *       still refer to the GEM PXP context-creation UAPI header specs to be
+ *       aware of possible failure due to system state machine at the time.
+ */
+#define I915_PARAM_PXP_STATUS           58
+
 /* Must be kept compact -- no holes and well documented */
 
 /**
@@ -2096,6 +2131,21 @@ struct drm_i915_gem_context_param {
  *
  * -ENODEV: feature not available
  * -EPERM: trying to mark a recoverable or not bannable context as protected
+ * -ENXIO: A dependency such as a component driver or firmware is not yet
+ *         loaded so user space may need to attempt again. Depending on the
+ *         device, this error may be reported if protected context creation is
+ *         attempted very early after kernel start because the internal timeout
+ *         waiting for such dependencies is not guaranteed to be larger than
+ *         required (numbers differ depending on system and kernel config):
+ *            - ADL/RPL: dependencies may take up to 3 seconds from kernel start
+ *                       while context creation internal timeout is 250 milisecs
+ *            - MTL: dependencies may take up to 8 seconds from kernel start
+ *                   while context creation internal timeout is 250 milisecs
+ *         NOTE: such dependencies happen once, so a subsequent call to create a
+ *         protected context after a prior successful call will not experience
+ *         such timeouts and will not return -ENXIO (unless the driver is reloaded,
+ *         or, depending on the device, resumes from a suspended state).
+ * -EIO: The firmware did not succeed in creating the protected context.
  */
 #define I915_CONTEXT_PARAM_PROTECTED_CONTENT    0xd
 /* Must be kept compact -- no holes and well documented */
@@ -3630,9 +3680,13 @@ struct drm_i915_gem_create_ext {
         *
         * For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
         * struct drm_i915_gem_create_ext_protected_content.
+        *
+        * For I915_GEM_CREATE_EXT_SET_PAT usage see
+        * struct drm_i915_gem_create_ext_set_pat.
         */
 #define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
 #define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
        __u64 extensions;
 };
 
@@ -3747,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content {
        __u32 flags;
 };
 
+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ *      struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ *              .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ *              .pat_index = 0,
+ *      };
+ *      struct drm_i915_gem_create_ext create_ext = {
+ *              .size = PAGE_SIZE,
+ *              .extensions = (uintptr_t)&set_pat_ext,
+ *      };
+ *
+ *      int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ *      if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+       /** @base: Extension link. See struct i915_user_extension. */
+       struct i915_user_extension base;
+       /**
+        * @pat_index: PAT index to be set
+        * PAT index is a bit field in Page Table Entry to control caching
+        * behaviors for GPU accesses. The definition of PAT index is
+        * platform dependent and can be found in hardware specifications,
+        */
+       __u32 pat_index;
+       /** @rsvd: reserved for future use */
+       __u32 rsvd;
+};
+
 /* ID of the protected content session managed by i915 when PXP is active */
 #define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
 
index e8c07da..6c80f96 100644 (file)
 
 #define AT_RECURSIVE           0x8000  /* Apply to the entire subtree */
 
+/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
+#define AT_HANDLE_FID          AT_REMOVEDIR    /* file handle is needed to
+                                       compare object identity and may not
+                                       be usable to open_by_handle_at(2) */
+
 #endif /* _UAPI_LINUX_FCNTL_H */
index 737318b..f089ab2 100644 (file)
@@ -1190,6 +1190,8 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
 #define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
 #define KVM_CAP_COUNTER_OFFSET 227
+#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
+#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1442,6 +1444,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_XIVE              KVM_DEV_TYPE_XIVE
        KVM_DEV_TYPE_ARM_PV_TIME,
 #define KVM_DEV_TYPE_ARM_PV_TIME       KVM_DEV_TYPE_ARM_PV_TIME
+       KVM_DEV_TYPE_RISCV_AIA,
+#define KVM_DEV_TYPE_RISCV_AIA         KVM_DEV_TYPE_RISCV_AIA
        KVM_DEV_TYPE_MAX,
 };
 
@@ -1613,7 +1617,7 @@ struct kvm_s390_ucas_mapping {
 #define KVM_GET_DEBUGREGS         _IOR(KVMIO,  0xa1, struct kvm_debugregs)
 #define KVM_SET_DEBUGREGS         _IOW(KVMIO,  0xa2, struct kvm_debugregs)
 /*
- * vcpu version available with KVM_ENABLE_CAP
+ * vcpu version available with KVM_CAP_ENABLE_CAP
  * vm version available with KVM_CAP_ENABLE_CAP_VM
  */
 #define KVM_ENABLE_CAP            _IOW(KVMIO,  0xa3, struct kvm_enable_cap)
index f55bc68..a246e11 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <asm/mman.h>
 #include <asm-generic/hugetlb_encode.h>
+#include <linux/types.h>
 
 #define MREMAP_MAYMOVE         1
 #define MREMAP_FIXED           2
 #define MAP_HUGE_2GB   HUGETLB_FLAG_ENCODE_2GB
 #define MAP_HUGE_16GB  HUGETLB_FLAG_ENCODE_16GB
 
+struct cachestat_range {
+       __u64 off;
+       __u64 len;
+};
+
+struct cachestat {
+       __u64 nr_cache;
+       __u64 nr_dirty;
+       __u64 nr_writeback;
+       __u64 nr_evicted;
+       __u64 nr_recently_evicted;
+};
+
 #endif /* _UAPI_LINUX_MMAN_H */
index 4d93967..8eb0d7b 100644 (file)
@@ -74,7 +74,8 @@
 #define MOVE_MOUNT_T_AUTOMOUNTS                0x00000020 /* Follow automounts on to path */
 #define MOVE_MOUNT_T_EMPTY_PATH                0x00000040 /* Empty to path permitted */
 #define MOVE_MOUNT_SET_GROUP           0x00000100 /* Set sharing group instead */
-#define MOVE_MOUNT__MASK               0x00000177
+#define MOVE_MOUNT_BENEATH             0x00000200 /* Mount beneath top mount */
+#define MOVE_MOUNT__MASK               0x00000377
 
 /*
  * fsopen() flags.
index f23d9a1..3c36aea 100644 (file)
@@ -294,4 +294,15 @@ struct prctl_mm_map {
 
 #define PR_SET_MEMORY_MERGE            67
 #define PR_GET_MEMORY_MERGE            68
+
+#define PR_RISCV_V_SET_CONTROL         69
+#define PR_RISCV_V_GET_CONTROL         70
+# define PR_RISCV_V_VSTATE_CTRL_DEFAULT                0
+# define PR_RISCV_V_VSTATE_CTRL_OFF            1
+# define PR_RISCV_V_VSTATE_CTRL_ON             2
+# define PR_RISCV_V_VSTATE_CTRL_INHERIT                (1 << 4)
+# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK       0x3
+# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK      0xc
+# define PR_RISCV_V_VSTATE_CTRL_MASK           0x1f
+
 #endif /* _LINUX_PRCTL_H */
index 92e1b70..f5c48b6 100644 (file)
 #define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
 /* Specify an eventfd file descriptor to signal on log write. */
 #define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+/* By default, a device gets one vhost_worker that its virtqueues share. This
+ * command allows the owner of the device to create an additional vhost_worker
+ * for the device. It can later be bound to 1 or more of its virtqueues using
+ * the VHOST_ATTACH_VRING_WORKER command.
+ *
+ * This must be called after VHOST_SET_OWNER and the caller must be the owner
+ * of the device. The new thread will inherit caller's cgroups and namespaces,
+ * and will share the caller's memory space. The new thread will also be
+ * counted against the caller's RLIMIT_NPROC value.
+ *
+ * The worker's ID used in other commands will be returned in
+ * vhost_worker_state.
+ */
+#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
+/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
+ * virtqueue. If userspace is not able to call this for workers its created,
+ * the kernel will free all the device's workers when the device is closed.
+ */
+#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
 
 /* Ring setup. */
 /* Set number of descriptors in ring. This parameter can not
 #define VHOST_VRING_BIG_ENDIAN 1
 #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
 #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
+ * virtqueues.
+ *
+ * This will replace the virtqueue's existing worker. If the replaced worker
+ * is no longer attached to any virtqueues, it can be freed with
+ * VHOST_FREE_WORKER.
+ */
+#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15,             \
+                                      struct vhost_vring_worker)
+/* Return the vring worker's ID */
+#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16,               \
+                                    struct vhost_vring_worker)
 
 /* The following ioctls use eventfd file descriptors to signal and poll
  * for events. */
index 0aa955a..f9939da 100644 (file)
@@ -274,6 +274,7 @@ typedef int __bitwise snd_pcm_subformat_t;
 #define SNDRV_PCM_INFO_DOUBLE          0x00000004      /* Double buffering needed for PCM start/stop */
 #define SNDRV_PCM_INFO_BATCH           0x00000010      /* double buffering */
 #define SNDRV_PCM_INFO_SYNC_APPLPTR    0x00000020      /* need the explicit sync of appl_ptr update */
+#define SNDRV_PCM_INFO_PERFECT_DRAIN   0x00000040      /* silencing at the end of stream is not required */
 #define SNDRV_PCM_INFO_INTERLEAVED     0x00000100      /* channels are interleaved */
 #define SNDRV_PCM_INFO_NONINTERLEAVED  0x00000200      /* channels are not interleaved */
 #define SNDRV_PCM_INFO_COMPLEX         0x00000400      /* complex frame organization (mmap only) */
@@ -383,6 +384,9 @@ typedef int snd_pcm_hw_param_t;
 #define SNDRV_PCM_HW_PARAMS_NORESAMPLE (1<<0)  /* avoid rate resampling */
 #define SNDRV_PCM_HW_PARAMS_EXPORT_BUFFER      (1<<1)  /* export buffer */
 #define SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP   (1<<2)  /* disable period wakeups */
+#define SNDRV_PCM_HW_PARAMS_NO_DRAIN_SILENCE   (1<<3)  /* suppress drain with the filling
+                                                        * of the silence samples
+                                                        */
 
 struct snd_interval {
        unsigned int min, max;
@@ -708,7 +712,7 @@ enum {
  *  Raw MIDI section - /dev/snd/midi??
  */
 
-#define SNDRV_RAWMIDI_VERSION          SNDRV_PROTOCOL_VERSION(2, 0, 2)
+#define SNDRV_RAWMIDI_VERSION          SNDRV_PROTOCOL_VERSION(2, 0, 4)
 
 enum {
        SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
@@ -719,6 +723,7 @@ enum {
 #define SNDRV_RAWMIDI_INFO_OUTPUT              0x00000001
 #define SNDRV_RAWMIDI_INFO_INPUT               0x00000002
 #define SNDRV_RAWMIDI_INFO_DUPLEX              0x00000004
+#define SNDRV_RAWMIDI_INFO_UMP                 0x00000008
 
 struct snd_rawmidi_info {
        unsigned int device;            /* RO/WR (control): device number */
@@ -779,6 +784,72 @@ struct snd_rawmidi_status {
 };
 #endif
 
+/* UMP EP info flags */
+#define SNDRV_UMP_EP_INFO_STATIC_BLOCKS                0x01
+
+/* UMP EP Protocol / JRTS capability bits */
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI_MASK      0x0300
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI1          0x0100 /* MIDI 1.0 */
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI2          0x0200 /* MIDI 2.0 */
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_MASK      0x0003
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_TX                0x0001 /* JRTS Transmit */
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_RX                0x0002 /* JRTS Receive */
+
+/* UMP Endpoint information */
+struct snd_ump_endpoint_info {
+       int card;                       /* card number */
+       int device;                     /* device number */
+       unsigned int flags;             /* additional info */
+       unsigned int protocol_caps;     /* protocol capabilities */
+       unsigned int protocol;          /* current protocol */
+       unsigned int num_blocks;        /* # of function blocks */
+       unsigned short version;         /* UMP major/minor version */
+       unsigned short family_id;       /* MIDI device family ID */
+       unsigned short model_id;        /* MIDI family model ID */
+       unsigned int manufacturer_id;   /* MIDI manufacturer ID */
+       unsigned char sw_revision[4];   /* software revision */
+       unsigned short padding;
+       unsigned char name[128];        /* endpoint name string */
+       unsigned char product_id[128];  /* unique product id string */
+       unsigned char reserved[32];
+} __packed;
+
+/* UMP direction */
+#define SNDRV_UMP_DIR_INPUT            0x01
+#define SNDRV_UMP_DIR_OUTPUT           0x02
+#define SNDRV_UMP_DIR_BIDIRECTION      0x03
+
+/* UMP block info flags */
+#define SNDRV_UMP_BLOCK_IS_MIDI1       (1U << 0) /* MIDI 1.0 port w/o restrict */
+#define SNDRV_UMP_BLOCK_IS_LOWSPEED    (1U << 1) /* 31.25Kbps B/W MIDI1 port */
+
+/* UMP block user-interface hint */
+#define SNDRV_UMP_BLOCK_UI_HINT_UNKNOWN                0x00
+#define SNDRV_UMP_BLOCK_UI_HINT_RECEIVER       0x01
+#define SNDRV_UMP_BLOCK_UI_HINT_SENDER         0x02
+#define SNDRV_UMP_BLOCK_UI_HINT_BOTH           0x03
+
+/* UMP groups and blocks */
+#define SNDRV_UMP_MAX_GROUPS           16
+#define SNDRV_UMP_MAX_BLOCKS           32
+
+/* UMP Block information */
+struct snd_ump_block_info {
+       int card;                       /* card number */
+       int device;                     /* device number */
+       unsigned char block_id;         /* block ID (R/W) */
+       unsigned char direction;        /* UMP direction */
+       unsigned char active;           /* Activeness */
+       unsigned char first_group;      /* first group ID */
+       unsigned char num_groups;       /* number of groups */
+       unsigned char midi_ci_version;  /* MIDI-CI support version */
+       unsigned char sysex8_streams;   /* max number of sysex8 streams */
+       unsigned char ui_hint;          /* user interface hint */
+       unsigned int flags;             /* various info flags */
+       unsigned char name[128];        /* block name string */
+       unsigned char reserved[32];
+} __packed;
+
 #define SNDRV_RAWMIDI_IOCTL_PVERSION   _IOR('W', 0x00, int)
 #define SNDRV_RAWMIDI_IOCTL_INFO       _IOR('W', 0x01, struct snd_rawmidi_info)
 #define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int)
@@ -786,6 +857,9 @@ struct snd_rawmidi_status {
 #define SNDRV_RAWMIDI_IOCTL_STATUS     _IOWR('W', 0x20, struct snd_rawmidi_status)
 #define SNDRV_RAWMIDI_IOCTL_DROP       _IOW('W', 0x30, int)
 #define SNDRV_RAWMIDI_IOCTL_DRAIN      _IOW('W', 0x31, int)
+/* Additional ioctls for UMP rawmidi devices */
+#define SNDRV_UMP_IOCTL_ENDPOINT_INFO  _IOR('W', 0x40, struct snd_ump_endpoint_info)
+#define SNDRV_UMP_IOCTL_BLOCK_INFO     _IOR('W', 0x41, struct snd_ump_block_info)
 
 /*
  *  Timer section - /dev/snd/timer
@@ -961,7 +1035,7 @@ struct snd_timer_tread {
  *                                                                          *
  ****************************************************************************/
 
-#define SNDRV_CTL_VERSION              SNDRV_PROTOCOL_VERSION(2, 0, 8)
+#define SNDRV_CTL_VERSION              SNDRV_PROTOCOL_VERSION(2, 0, 9)
 
 struct snd_ctl_card_info {
        int card;                       /* card number */
@@ -1122,6 +1196,9 @@ struct snd_ctl_tlv {
 #define SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE _IOWR('U', 0x40, int)
 #define SNDRV_CTL_IOCTL_RAWMIDI_INFO   _IOWR('U', 0x41, struct snd_rawmidi_info)
 #define SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE _IOW('U', 0x42, int)
+#define SNDRV_CTL_IOCTL_UMP_NEXT_DEVICE        _IOWR('U', 0x43, int)
+#define SNDRV_CTL_IOCTL_UMP_ENDPOINT_INFO _IOWR('U', 0x44, struct snd_ump_endpoint_info)
+#define SNDRV_CTL_IOCTL_UMP_BLOCK_INFO _IOWR('U', 0x45, struct snd_ump_block_info)
 #define SNDRV_CTL_IOCTL_POWER          _IOWR('U', 0xd0, int)
 #define SNDRV_CTL_IOCTL_POWER_STATE    _IOR('U', 0xd1, int)
 
index 67a8d6b..adfbae2 100644 (file)
@@ -68,8 +68,13 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
        while (ci < cmds->cnt && ei < excludes->cnt) {
                cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
                if (cmp < 0) {
-                       zfree(&cmds->names[cj]);
-                       cmds->names[cj++] = cmds->names[ci++];
+                       if (ci == cj) {
+                               ci++;
+                               cj++;
+                       } else {
+                               zfree(&cmds->names[cj]);
+                               cmds->names[cj++] = cmds->names[ci++];
+                       }
                } else if (cmp == 0) {
                        ci++;
                        ei++;
@@ -77,10 +82,11 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
                        ei++;
                }
        }
-
-       while (ci < cmds->cnt) {
-               zfree(&cmds->names[cj]);
-               cmds->names[cj++] = cmds->names[ci++];
+       if (ci != cj) {
+               while (ci < cmds->cnt) {
+                       zfree(&cmds->names[cj]);
+                       cmds->names[cj++] = cmds->names[ci++];
+               }
        }
        for (ci = cj; ci < cmds->cnt; ci++)
                zfree(&cmds->names[ci]);
index 1b3a36f..3ca28d4 100644 (file)
@@ -417,11 +417,10 @@ class YnlFamily(SpecFamily):
         pad = b'\x00' * ((4 - len(attr_payload) % 4) % 4)
         return struct.pack('HH', len(attr_payload) + 4, nl_type) + attr_payload + pad
 
-    def _decode_enum(self, rsp, attr_spec):
-        raw = rsp[attr_spec['name']]
+    def _decode_enum(self, raw, attr_spec):
         enum = self.consts[attr_spec['enum']]
-        i = attr_spec.get('value-start', 0)
         if 'enum-as-flags' in attr_spec and attr_spec['enum-as-flags']:
+            i = 0
             value = set()
             while raw:
                 if raw & 1:
@@ -429,8 +428,8 @@ class YnlFamily(SpecFamily):
                 raw >>= 1
                 i += 1
         else:
-            value = enum.entries_by_val[raw - i].name
-        rsp[attr_spec['name']] = value
+            value = enum.entries_by_val[raw].name
+        return value
 
     def _decode_binary(self, attr, attr_spec):
         if attr_spec.struct_name:
@@ -438,7 +437,7 @@ class YnlFamily(SpecFamily):
             decoded = attr.as_struct(members)
             for m in members:
                 if m.enum:
-                    self._decode_enum(decoded, m)
+                    decoded[m.name] = self._decode_enum(decoded[m.name], m)
         elif attr_spec.sub_type:
             decoded = attr.as_c_array(attr_spec.sub_type)
         else:
@@ -466,6 +465,9 @@ class YnlFamily(SpecFamily):
             else:
                 raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
 
+            if 'enum' in attr_spec:
+                decoded = self._decode_enum(decoded, attr_spec)
+
             if not attr_spec.is_multi:
                 rsp[attr_spec['name']] = decoded
             elif attr_spec.name in rsp:
@@ -473,8 +475,6 @@ class YnlFamily(SpecFamily):
             else:
                 rsp[attr_spec.name] = [decoded]
 
-            if 'enum' in attr_spec:
-                self._decode_enum(rsp, attr_spec)
         return rsp
 
     def _decode_extack_path(self, attrs, attr_set, offset, target):
index 2e1caab..c0f25d0 100644 (file)
@@ -826,3 +826,9 @@ bool arch_is_rethunk(struct symbol *sym)
 {
        return !strcmp(sym->name, "__x86_return_thunk");
 }
+
+bool arch_is_embedded_insn(struct symbol *sym)
+{
+       return !strcmp(sym->name, "retbleed_return_thunk") ||
+              !strcmp(sym->name, "srso_safe_ret");
+}
index 8936a05..1384090 100644 (file)
@@ -389,7 +389,7 @@ static int decode_instructions(struct objtool_file *file)
                if (!strcmp(sec->name, ".noinstr.text") ||
                    !strcmp(sec->name, ".entry.text") ||
                    !strcmp(sec->name, ".cpuidle.text") ||
-                   !strncmp(sec->name, ".text.__x86.", 12))
+                   !strncmp(sec->name, ".text..__x86.", 13))
                        sec->noinstr = true;
 
                /*
@@ -455,7 +455,7 @@ static int decode_instructions(struct objtool_file *file)
                                return -1;
                        }
 
-                       if (func->return_thunk || func->alias != func)
+                       if (func->embedded_insn || func->alias != func)
                                continue;
 
                        if (!find_insn(file, sec, func->offset)) {
@@ -1288,16 +1288,33 @@ static int add_ignore_alternatives(struct objtool_file *file)
        return 0;
 }
 
+/*
+ * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
+ * will be added to the .retpoline_sites section.
+ */
 __weak bool arch_is_retpoline(struct symbol *sym)
 {
        return false;
 }
 
+/*
+ * Symbols that replace INSN_RETURN, every (tail) call to such a symbol
+ * will be added to the .return_sites section.
+ */
 __weak bool arch_is_rethunk(struct symbol *sym)
 {
        return false;
 }
 
+/*
+ * Symbols that are embedded inside other instructions, because sometimes crazy
+ * code exists. These are mostly ignored for validation purposes.
+ */
+__weak bool arch_is_embedded_insn(struct symbol *sym)
+{
+       return false;
+}
+
 static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
 {
        struct reloc *reloc;
@@ -1576,14 +1593,14 @@ static int add_jump_destinations(struct objtool_file *file)
                        struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
 
                        /*
-                        * This is a special case for zen_untrain_ret().
+                        * This is a special case for retbleed_untrain_ret().
                         * It jumps to __x86_return_thunk(), but objtool
                         * can't find the thunk's starting RET
                         * instruction, because the RET is also in the
                         * middle of another instruction.  Objtool only
                         * knows about the outer instruction.
                         */
-                       if (sym && sym->return_thunk) {
+                       if (sym && sym->embedded_insn) {
                                add_return_call(file, insn, false);
                                continue;
                        }
@@ -2502,6 +2519,9 @@ static int classify_symbols(struct objtool_file *file)
                if (arch_is_rethunk(func))
                        func->return_thunk = true;
 
+               if (arch_is_embedded_insn(func))
+                       func->embedded_insn = true;
+
                if (arch_ftrace_match(func->name))
                        func->fentry = true;
 
@@ -2630,12 +2650,17 @@ static int decode_sections(struct objtool_file *file)
        return 0;
 }
 
-static bool is_fentry_call(struct instruction *insn)
+static bool is_special_call(struct instruction *insn)
 {
-       if (insn->type == INSN_CALL &&
-           insn_call_dest(insn) &&
-           insn_call_dest(insn)->fentry)
-               return true;
+       if (insn->type == INSN_CALL) {
+               struct symbol *dest = insn_call_dest(insn);
+
+               if (!dest)
+                       return false;
+
+               if (dest->fentry || dest->embedded_insn)
+                       return true;
+       }
 
        return false;
 }
@@ -3636,7 +3661,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                        if (ret)
                                return ret;
 
-                       if (opts.stackval && func && !is_fentry_call(insn) &&
+                       if (opts.stackval && func && !is_special_call(insn) &&
                            !has_valid_stack_frame(&state)) {
                                WARN_INSN(insn, "call without frame pointer save/setup");
                                return 1;
index d420b5d..081befa 100644 (file)
@@ -1005,7 +1005,7 @@ struct elf *elf_open_read(const char *name, int flags)
                perror("malloc");
                return NULL;
        }
-       memset(elf, 0, offsetof(struct elf, sections));
+       memset(elf, 0, sizeof(*elf));
 
        INIT_LIST_HEAD(&elf->sections);
 
index 2b6d2ce..0b303eb 100644 (file)
@@ -90,6 +90,7 @@ int arch_decode_hint_reg(u8 sp_reg, int *base);
 
 bool arch_is_retpoline(struct symbol *sym);
 bool arch_is_rethunk(struct symbol *sym);
+bool arch_is_embedded_insn(struct symbol *sym);
 
 int arch_rewrite_retpolines(struct objtool_file *file);
 
index c532d70..9f71e98 100644 (file)
@@ -66,6 +66,7 @@ struct symbol {
        u8 fentry            : 1;
        u8 profiling_func    : 1;
        u8 warned            : 1;
+       u8 embedded_insn     : 1;
        struct list_head pv_target;
        struct reloc *relocs;
 };
index 0609c19..c5db0de 100644 (file)
@@ -155,9 +155,9 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
 ifdef CSINCLUDES
   LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
 endif
-OPENCSDLIBS := -lopencsd_c_api
+OPENCSDLIBS := -lopencsd_c_api -lopencsd
 ifeq ($(findstring -static,${LDFLAGS}),-static)
-  OPENCSDLIBS += -lopencsd -lstdc++
+  OPENCSDLIBS += -lstdc++
 endif
 ifdef CSLIBS
   LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
index 561de0c..512a8f1 100644 (file)
@@ -54,10 +54,11 @@ double perf_pmu__cpu_slots_per_cycle(void)
                perf_pmu__pathname_scnprintf(path, sizeof(path),
                                             pmu->name, "caps/slots");
                /*
-                * The value of slots is not greater than 32 bits, but sysfs__read_int
-                * can't read value with 0x prefix, so use sysfs__read_ull instead.
+                * The value of slots is not greater than 32 bits, but
+                * filename__read_int can't read value with 0x prefix,
+                * so use filename__read_ull instead.
                 */
-               sysfs__read_ull(path, &slots);
+               filename__read_ull(path, &slots);
        }
 
        return slots ? (double)slots : NAN;
index 3f1886a..cfda251 100644 (file)
 448    n64     process_mrelease                sys_process_mrelease
 449    n64     futex_waitv                     sys_futex_waitv
 450    common  set_mempolicy_home_node         sys_set_mempolicy_home_node
+451    n64     cachestat                       sys_cachestat
index a0be127..8c0b08b 100644 (file)
 448    common  process_mrelease                sys_process_mrelease
 449    common  futex_waitv                     sys_futex_waitv
 450    nospu   set_mempolicy_home_node         sys_set_mempolicy_home_node
+451    common  cachestat                       sys_cachestat
index b7223fe..5f3edb3 100644 (file)
@@ -250,6 +250,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
        if (!chain || chain->nr < 3)
                return skip_slot;
 
+       addr_location__init(&al);
        ip = chain->ips[1];
 
        thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
@@ -259,6 +260,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
 
        if (!dso) {
                pr_debug("%" PRIx64 " dso is NULL\n", ip);
+               addr_location__exit(&al);
                return skip_slot;
        }
 
@@ -279,5 +281,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
                 */
                skip_slot = 3;
        }
+
+       addr_location__exit(&al);
        return skip_slot;
 }
index b68f475..a6935af 100644 (file)
 448  common    process_mrelease        sys_process_mrelease            sys_process_mrelease
 449  common    futex_waitv             sys_futex_waitv                 sys_futex_waitv
 450  common    set_mempolicy_home_node sys_set_mempolicy_home_node     sys_set_mempolicy_home_node
+451  common    cachestat               sys_cachestat                   sys_cachestat
index c84d126..227538b 100644 (file)
 448    common  process_mrelease        sys_process_mrelease
 449    common  futex_waitv             sys_futex_waitv
 450    common  set_mempolicy_home_node sys_set_mempolicy_home_node
+451    common  cachestat               sys_cachestat
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index 0f158dc..07bbc44 100644 (file)
@@ -1,5 +1,6 @@
 perf-y += sched-messaging.o
 perf-y += sched-pipe.o
+perf-y += sched-seccomp-notify.o
 perf-y += syscall.o
 perf-y += mem-functions.o
 perf-y += futex-hash.o
index 0d2b659..a0625c7 100644 (file)
@@ -21,6 +21,7 @@ extern struct timeval bench__start, bench__end, bench__runtime;
 int bench_numa(int argc, const char **argv);
 int bench_sched_messaging(int argc, const char **argv);
 int bench_sched_pipe(int argc, const char **argv);
+int bench_sched_seccomp_notify(int argc, const char **argv);
 int bench_syscall_basic(int argc, const char **argv);
 int bench_syscall_getpgid(int argc, const char **argv);
 int bench_syscall_fork(int argc, const char **argv);
diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c
new file mode 100644 (file)
index 0000000..b04ebcd
--- /dev/null
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <subcmd/parse-options.h>
+#include "bench.h"
+
+#include <uapi/linux/filter.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <linux/unistd.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <linux/time64.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+
+#include <unistd.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <string.h>
+#include <errno.h>
+#include <err.h>
+#include <inttypes.h>
+
+#define LOOPS_DEFAULT 1000000UL
+static uint64_t loops = LOOPS_DEFAULT;
+static bool sync_mode;
+
+static const struct option options[] = {
+       OPT_U64('l', "loop",    &loops,         "Specify number of loops"),
+       OPT_BOOLEAN('s', "sync-mode", &sync_mode,
+                   "Enable the synchronious mode for seccomp notifications"),
+       OPT_END()
+};
+
+static const char * const bench_seccomp_usage[] = {
+       "perf bench sched secccomp-notify <options>",
+       NULL
+};
+
+static int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+       return syscall(__NR_seccomp, op, flags, args);
+}
+
+static int user_notif_syscall(int nr, unsigned int flags)
+{
+       struct sock_filter filter[] = {
+               BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+                       offsetof(struct seccomp_data, nr)),
+               BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+               BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+       };
+
+       struct sock_fprog prog = {
+               .len = (unsigned short)ARRAY_SIZE(filter),
+               .filter = filter,
+       };
+
+       return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
+}
+
+#define USER_NOTIF_MAGIC INT_MAX
+static void user_notification_sync_loop(int listener)
+{
+       struct seccomp_notif_resp resp;
+       struct seccomp_notif req;
+       uint64_t nr;
+
+       for (nr = 0; nr < loops; nr++) {
+               memset(&req, 0, sizeof(req));
+               if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req))
+                       err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_RECV failed");
+
+               if (req.data.nr != __NR_gettid)
+                       errx(EXIT_FAILURE, "unexpected syscall: %d", req.data.nr);
+
+               resp.id = req.id;
+               resp.error = 0;
+               resp.val = USER_NOTIF_MAGIC;
+               resp.flags = 0;
+               if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp))
+                       err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_SEND failed");
+       }
+}
+
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS  SECCOMP_IOW(4, __u64)
+#endif
+int bench_sched_seccomp_notify(int argc, const char **argv)
+{
+       struct timeval start, stop, diff;
+       unsigned long long result_usec = 0;
+       int status, listener;
+       pid_t pid;
+       long ret;
+
+       argc = parse_options(argc, argv, options, bench_seccomp_usage, 0);
+
+       gettimeofday(&start, NULL);
+
+       prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       listener = user_notif_syscall(__NR_gettid,
+                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
+       if (listener < 0)
+               err(EXIT_FAILURE, "can't create a notification descriptor");
+
+       pid = fork();
+       if (pid < 0)
+               err(EXIT_FAILURE, "fork");
+       if (pid == 0) {
+               if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0))
+                       err(EXIT_FAILURE, "can't set the parent death signal");
+               while (1) {
+                       ret = syscall(__NR_gettid);
+                       if (ret == USER_NOTIF_MAGIC)
+                               continue;
+                       break;
+               }
+               _exit(1);
+       }
+
+       if (sync_mode) {
+               if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+                            SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0))
+                       err(EXIT_FAILURE,
+                           "can't set SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP");
+       }
+       user_notification_sync_loop(listener);
+
+       kill(pid, SIGKILL);
+       if (waitpid(pid, &status, 0) != pid)
+               err(EXIT_FAILURE, "waitpid(%d) failed", pid);
+       if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL)
+               errx(EXIT_FAILURE, "unexpected exit code: %d", status);
+
+       gettimeofday(&stop, NULL);
+       timersub(&stop, &start, &diff);
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               printf("# Executed %" PRIu64 " system calls\n\n",
+                       loops);
+
+               result_usec = diff.tv_sec * USEC_PER_SEC;
+               result_usec += diff.tv_usec;
+
+               printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+                      (unsigned long) diff.tv_sec,
+                      (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+
+               printf(" %14lf usecs/op\n",
+                      (double)result_usec / (double)loops);
+               printf(" %14d ops/sec\n",
+                      (int)((double)loops /
+                            ((double)result_usec / (double)USEC_PER_SEC)));
+               break;
+
+       case BENCH_FORMAT_SIMPLE:
+               printf("%lu.%03lu\n",
+                      (unsigned long) diff.tv_sec,
+                      (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+               break;
+
+       default:
+               /* reaching here is something disaster */
+               fprintf(stderr, "Unknown format:%d\n", bench_format);
+               exit(1);
+               break;
+       }
+
+       return 0;
+}
index db435b7..5033e8b 100644 (file)
@@ -47,6 +47,7 @@ static struct bench numa_benchmarks[] = {
 static struct bench sched_benchmarks[] = {
        { "messaging",  "Benchmark for scheduling and IPC",             bench_sched_messaging   },
        { "pipe",       "Benchmark for pipe() between two processes",   bench_sched_pipe        },
+       { "seccomp-notify",     "Benchmark for seccomp user notify",    bench_sched_seccomp_notify},
        { "all",        "Run all scheduler benchmarks",         NULL                    },
        { NULL,         NULL,                                           NULL                    }
 };
index bf5083c..4d28177 100644 (file)
   },
   {
     "MetricName": "nps1_die_to_dram",
-    "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+    "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
     "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+    "MetricConstraint": "NO_GROUP_EVENTS",
     "MetricGroup": "data_fabric",
     "PerPkg": "1",
     "ScaleUnit": "6.1e-5MiB"
index a71694a..60e1945 100644 (file)
   },
   {
     "MetricName": "nps1_die_to_dram",
-    "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+    "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
     "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+    "MetricConstraint": "NO_GROUP_EVENTS",
     "MetricGroup": "data_fabric",
     "PerPkg": "1",
     "ScaleUnit": "6.1e-5MiB"
index 988cf68..3e9e178 100644 (file)
   },
   {
     "MetricName": "nps1_die_to_dram",
-    "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+    "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
     "MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
     "MetricGroup": "data_fabric",
     "PerPkg": "1",
+    "MetricConstraint": "NO_GROUP_EVENTS",
     "ScaleUnit": "6.1e-5MiB"
   }
 ]
index b2f8284..658fb95 100644 (file)
@@ -1631,6 +1631,16 @@ static bool test__pmu_cpu_valid(void)
        return !!perf_pmus__find("cpu");
 }
 
+static bool test__pmu_cpu_event_valid(void)
+{
+       struct perf_pmu *pmu = perf_pmus__find("cpu");
+
+       if (!pmu)
+               return false;
+
+       return perf_pmu__has_format(pmu, "event");
+}
+
 static bool test__intel_pt_valid(void)
 {
        return !!perf_pmus__find("intel_pt");
@@ -2179,7 +2189,7 @@ static const struct evlist_test test__events_pmu[] = {
        },
        {
                .name  = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
-               .valid = test__pmu_cpu_valid,
+               .valid = test__pmu_cpu_event_valid,
                .check = test__checkevent_complex_name,
                /* 3 */
        },
diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
new file mode 100755 (executable)
index 0000000..319f36e
--- /dev/null
@@ -0,0 +1,83 @@
+#!/bin/bash
+# test perf probe of function from different CU
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# skip if there's no gcc
+if ! [ -x "$(command -v gcc)" ]; then
+        echo "failed: no gcc compiler"
+        exit 2
+fi
+
+temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX)
+
+cleanup()
+{
+       trap - EXIT TERM INT
+       if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then
+               echo "--- Cleaning up ---"
+               perf probe -x ${temp_dir}/testfile -d foo || true
+               rm -f "${temp_dir}/"*
+               rmdir "${temp_dir}"
+       fi
+}
+
+trap_cleanup()
+{
+        cleanup
+        exit 1
+}
+
+trap trap_cleanup EXIT TERM INT
+
+cat > ${temp_dir}/testfile-foo.h << EOF
+struct t
+{
+  int *p;
+  int c;
+};
+
+extern int foo (int i, struct t *t);
+EOF
+
+cat > ${temp_dir}/testfile-foo.c << EOF
+#include "testfile-foo.h"
+
+int
+foo (int i, struct t *t)
+{
+  int j, res = 0;
+  for (j = 0; j < i && j < t->c; j++)
+    res += t->p[j];
+
+  return res;
+}
+EOF
+
+cat > ${temp_dir}/testfile-main.c << EOF
+#include "testfile-foo.h"
+
+static struct t g;
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  int j[argc];
+  g.c = argc;
+  g.p = j;
+  for (i = 0; i < argc; i++)
+    j[i] = (int) argv[i][0];
+  return foo (3, &g);
+}
+EOF
+
+gcc -g -Og -flto -c ${temp_dir}/testfile-foo.c -o ${temp_dir}/testfile-foo.o
+gcc -g -Og -c ${temp_dir}/testfile-main.c -o ${temp_dir}/testfile-main.o
+gcc -g -Og -o ${temp_dir}/testfile ${temp_dir}/testfile-foo.o ${temp_dir}/testfile-main.o
+
+perf probe -x ${temp_dir}/testfile --funcs foo
+perf probe -x ${temp_dir}/testfile foo
+
+cleanup
index 25f075f..968dddd 100644 (file)
@@ -58,9 +58,9 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _
 
        signal(SIGCHLD, sig_handler);
 
-       evlist = evlist__new_default();
+       evlist = evlist__new_dummy();
        if (evlist == NULL) {
-               pr_debug("evlist__new_default\n");
+               pr_debug("evlist__new_dummy\n");
                return -1;
        }
 
index 3bef212..39b74d8 100644 (file)
@@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
 #define        SCM_RIGHTS      0x01            /* rw: access rights (array of int) */
 #define SCM_CREDENTIALS 0x02           /* rw: struct ucred             */
 #define SCM_SECURITY   0x03            /* rw: security label           */
+#define SCM_PIDFD      0x04            /* ro: pidfd (int)              */
 
 struct ucred {
        __u32   pid;
@@ -326,6 +327,7 @@ struct ucred {
                                          */
 
 #define MSG_ZEROCOPY   0x4000000       /* Use user data in kernel path */
+#define MSG_SPLICE_PAGES 0x8000000     /* Splice the pages from the iterator in sendmsg() */
 #define MSG_FASTOPEN   0x20000000      /* Send data in TCP SYN */
 #define MSG_CMSG_CLOEXEC 0x40000000    /* Set close_on_exec for file
                                           descriptor received through
@@ -336,6 +338,9 @@ struct ucred {
 #define MSG_CMSG_COMPAT        0               /* We never have 32 bit fixups */
 #endif
 
+/* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
+#define MSG_INTERNAL_SENDMSG_FLAGS \
+       (MSG_SPLICE_PAGES | MSG_SENDPAGE_NOPOLICY | MSG_SENDPAGE_DECRYPTED)
 
 /* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
 #define SOL_IP         0
index 32e552f..ce5e632 100755 (executable)
@@ -10,7 +10,7 @@ fi
 linux_mount=${linux_header_dir}/mount.h
 
 printf "static const char *move_mount_flags[] = {\n"
-regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
 grep -E $regex ${linux_mount} | \
        sed -r "s/$regex/\2 \1/g"       | \
        xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
index aa99340..ed3ff96 100644 (file)
@@ -8,6 +8,12 @@
 #ifndef MSG_WAITFORONE
 #define MSG_WAITFORONE            0x10000
 #endif
+#ifndef MSG_BATCH
+#define MSG_BATCH                 0x40000
+#endif
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY            0x4000000
+#endif
 #ifndef MSG_SPLICE_PAGES
 #define MSG_SPLICE_PAGES       0x8000000
 #endif
@@ -50,6 +56,8 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
        P_MSG_FLAG(NOSIGNAL);
        P_MSG_FLAG(MORE);
        P_MSG_FLAG(WAITFORONE);
+       P_MSG_FLAG(BATCH);
+       P_MSG_FLAG(ZEROCOPY);
        P_MSG_FLAG(SPLICE_PAGES);
        P_MSG_FLAG(FASTOPEN);
        P_MSG_FLAG(CMSG_CLOEXEC);
index 45e018c..2941d88 100644 (file)
@@ -478,8 +478,10 @@ static const char *die_get_file_name(Dwarf_Die *dw_die, int idx)
 {
        Dwarf_Die cu_die;
        Dwarf_Files *files;
+       Dwarf_Attribute attr_mem;
 
-       if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) ||
+       if (idx < 0 || !dwarf_attr_integrate(dw_die, DW_AT_decl_file, &attr_mem) ||
+           !dwarf_cu_die(attr_mem.cu, &cu_die, NULL, NULL, NULL, NULL, NULL, NULL) ||
            dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
                return NULL;
 
index 4e62843..f4cb41e 100644 (file)
@@ -45,7 +45,6 @@
 
 static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
                                     struct thread *th, bool lock);
-static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip);
 
 static struct dso *machine__kernel_dso(struct machine *machine)
 {
@@ -2385,10 +2384,6 @@ static int add_callchain_ip(struct thread *thread,
        ms.maps = maps__get(al.maps);
        ms.map = map__get(al.map);
        ms.sym = al.sym;
-
-       if (!branch && append_inlines(cursor, &ms, ip) == 0)
-               goto out;
-
        srcline = callchain_srcline(&ms, al.addr);
        err = callchain_cursor_append(cursor, ip, &ms,
                                      branch, flags, nr_loop_iter,
index 5dcfbf3..c9ec0ca 100644 (file)
@@ -1216,6 +1216,14 @@ static int config_term_pmu(struct perf_event_attr *attr,
        if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) {
                const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
 
+               if (!pmu) {
+                       char *err_str;
+
+                       if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0)
+                               parse_events_error__handle(err, term->err_term,
+                                                          err_str, /*help=*/NULL);
+                       return -EINVAL;
+               }
                if (perf_pmu__supports_legacy_cache(pmu)) {
                        attr->type = PERF_TYPE_HW_CACHE;
                        return parse_events__decode_legacy_cache(term->config, pmu->type,
@@ -2092,16 +2100,16 @@ __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
        return lhs->core.idx - rhs->core.idx;
 }
 
-static int evlist__cmp(void *state, const struct list_head *l, const struct list_head *r)
+static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct list_head *r)
 {
        const struct perf_evsel *lhs_core = container_of(l, struct perf_evsel, node);
        const struct evsel *lhs = container_of(lhs_core, struct evsel, core);
        const struct perf_evsel *rhs_core = container_of(r, struct perf_evsel, node);
        const struct evsel *rhs = container_of(rhs_core, struct evsel, core);
-       int *leader_idx = state;
-       int lhs_leader_idx = *leader_idx, rhs_leader_idx = *leader_idx, ret;
+       int *force_grouped_idx = _fg_idx;
+       int lhs_sort_idx, rhs_sort_idx, ret;
        const char *lhs_pmu_name, *rhs_pmu_name;
-       bool lhs_has_group = false, rhs_has_group = false;
+       bool lhs_has_group, rhs_has_group;
 
        /*
         * First sort by grouping/leader. Read the leader idx only if the evsel
@@ -2113,15 +2121,25 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
         */
        if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
                lhs_has_group = true;
-               lhs_leader_idx = lhs_core->leader->idx;
+               lhs_sort_idx = lhs_core->leader->idx;
+       } else {
+               lhs_has_group = false;
+               lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
+                       ? *force_grouped_idx
+                       : lhs_core->idx;
        }
        if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
                rhs_has_group = true;
-               rhs_leader_idx = rhs_core->leader->idx;
+               rhs_sort_idx = rhs_core->leader->idx;
+       } else {
+               rhs_has_group = false;
+               rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
+                       ? *force_grouped_idx
+                       : rhs_core->idx;
        }
 
-       if (lhs_leader_idx != rhs_leader_idx)
-               return lhs_leader_idx - rhs_leader_idx;
+       if (lhs_sort_idx != rhs_sort_idx)
+               return lhs_sort_idx - rhs_sort_idx;
 
        /* Group by PMU if there is a group. Groups can't span PMUs. */
        if (lhs_has_group && rhs_has_group) {
@@ -2138,10 +2156,10 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
 
 static int parse_events__sort_events_and_fix_groups(struct list_head *list)
 {
-       int idx = 0, unsorted_idx = -1;
+       int idx = 0, force_grouped_idx = -1;
        struct evsel *pos, *cur_leader = NULL;
        struct perf_evsel *cur_leaders_grp = NULL;
-       bool idx_changed = false;
+       bool idx_changed = false, cur_leader_force_grouped = false;
        int orig_num_leaders = 0, num_leaders = 0;
        int ret;
 
@@ -2166,12 +2184,14 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
                 */
                pos->core.idx = idx++;
 
-               if (unsorted_idx == -1 && pos == pos_leader && pos->core.nr_members < 2)
-                       unsorted_idx = pos->core.idx;
+               /* Remember an index to sort all forced grouped events together to. */
+               if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
+                   arch_evsel__must_be_in_group(pos))
+                       force_grouped_idx = pos->core.idx;
        }
 
        /* Sort events. */
-       list_sort(&unsorted_idx, list, evlist__cmp);
+       list_sort(&force_grouped_idx, list, evlist__cmp);
 
        /*
         * Recompute groups, splitting for PMUs and adding groups for events
@@ -2181,8 +2201,9 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
        list_for_each_entry(pos, list, core.node) {
                const struct evsel *pos_leader = evsel__leader(pos);
                const char *pos_pmu_name = pos->group_pmu_name;
-               const char *cur_leader_pmu_name, *pos_leader_pmu_name;
-               bool force_grouped = arch_evsel__must_be_in_group(pos);
+               const char *cur_leader_pmu_name;
+               bool pos_force_grouped = force_grouped_idx != -1 &&
+                       arch_evsel__must_be_in_group(pos);
 
                /* Reset index and nr_members. */
                if (pos->core.idx != idx)
@@ -2198,7 +2219,8 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
                        cur_leader = pos;
 
                cur_leader_pmu_name = cur_leader->group_pmu_name;
-               if ((cur_leaders_grp != pos->core.leader && !force_grouped) ||
+               if ((cur_leaders_grp != pos->core.leader &&
+                    (!pos_force_grouped || !cur_leader_force_grouped)) ||
                    strcmp(cur_leader_pmu_name, pos_pmu_name)) {
                        /* Event is for a different group/PMU than last. */
                        cur_leader = pos;
@@ -2208,14 +2230,14 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
                         * group.
                         */
                        cur_leaders_grp = pos->core.leader;
-               }
-               pos_leader_pmu_name = pos_leader->group_pmu_name;
-               if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) {
                        /*
-                        * Event's PMU differs from its leader's. Groups can't
-                        * span PMUs, so update leader from the group/PMU
-                        * tracker.
+                        * Avoid forcing events into groups with events that
+                        * don't need to be in the group.
                         */
+                       cur_leader_force_grouped = pos_force_grouped;
+               }
+               if (pos_leader != cur_leader) {
+                       /* The leader changed so update it. */
                        evsel__set_leader(pos, cur_leader);
                }
        }
index 7f984a7..28380e7 100644 (file)
@@ -1440,6 +1440,17 @@ void perf_pmu__del_formats(struct list_head *formats)
        }
 }
 
+bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name)
+{
+       struct perf_pmu_format *format;
+
+       list_for_each_entry(format, &pmu->format, list) {
+               if (!strcmp(format->name, name))
+                       return true;
+       }
+       return false;
+}
+
 bool is_pmu_core(const char *name)
 {
        return !strcmp(name, "cpu") || !strcmp(name, "cpum_cf") || is_sysfs_pmu_core(name);
index 203b928..6b414ce 100644 (file)
@@ -234,6 +234,7 @@ int perf_pmu__new_format(struct list_head *list, char *name,
 void perf_pmu__set_format(unsigned long *bits, long from, long to);
 int perf_pmu__format_parse(int dirfd, struct list_head *head);
 void perf_pmu__del_formats(struct list_head *formats);
+bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name);
 
 bool is_pmu_core(const char *name);
 bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu);
index 3cd9de4..c58ba9f 100644 (file)
@@ -152,16 +152,14 @@ static void pmu_read_sysfs(bool core_only)
        }
 
        closedir(dir);
-       if (core_only) {
-               if (!list_empty(&core_pmus))
-                       read_sysfs_core_pmus = true;
-               else {
-                       if (perf_pmu__create_placeholder_core_pmu(&core_pmus))
-                               read_sysfs_core_pmus = true;
-               }
-       } else {
+       if (list_empty(&core_pmus)) {
+               if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
+                       pr_err("Failure to set up any core PMUs\n");
+       }
+       if (!list_empty(&core_pmus)) {
                read_sysfs_core_pmus = true;
-               read_sysfs_all_pmus = true;
+               if (!core_only)
+                       read_sysfs_all_pmus = true;
        }
 }
 
index 7329b33..d45d5dc 100644 (file)
@@ -931,6 +931,11 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
         */
        if (config->aggr_mode == AGGR_THREAD && config->system_wide)
                return true;
+
+       /* Tool events have the software PMU but are only gathered on 1. */
+       if (evsel__is_tool(counter))
+               return true;
+
        /*
         * Skip value 0 when it's an uncore event and the given aggr id
         * does not belong to the PMU cpumask.
index 374d142..c6a0a27 100644 (file)
@@ -1038,9 +1038,7 @@ static int thread_stack__trace_end(struct thread_stack *ts,
 
 static bool is_x86_retpoline(const char *name)
 {
-       const char *p = strstr(name, "__x86_indirect_thunk_");
-
-       return p == name || !strcmp(name, "__indirect_thunk_start");
+       return strstr(name, "__x86_indirect_thunk_") == name;
 }
 
 /*
index 8a36ba5..9a10512 100644 (file)
@@ -5447,7 +5447,7 @@ unsigned int intel_model_duplicates(unsigned int model)
        case INTEL_FAM6_LAKEFIELD:
        case INTEL_FAM6_ALDERLAKE:
        case INTEL_FAM6_ALDERLAKE_L:
-       case INTEL_FAM6_ALDERLAKE_N:
+       case INTEL_FAM6_ATOM_GRACEMONT:
        case INTEL_FAM6_RAPTORLAKE:
        case INTEL_FAM6_RAPTORLAKE_P:
        case INTEL_FAM6_RAPTORLAKE_S:
index 0e78d8e..fb6ab9c 100644 (file)
@@ -999,10 +999,6 @@ static void mock_companion(struct acpi_device *adev, struct device *dev)
 #define SZ_64G (SZ_32G * 2)
 #endif
 
-#ifndef SZ_512G
-#define SZ_512G (SZ_64G * 8)
-#endif
-
 static __init int cxl_rch_init(void)
 {
        int rc, i;
index 03539d8..75ea208 100644 (file)
@@ -206,9 +206,9 @@ static noinline void __init check_new_node(struct maple_tree *mt)
                                e = i - 1;
                } else {
                        if (i >= 4)
-                               e = i - 4;
-                       else if (i == 3)
-                               e = i - 2;
+                               e = i - 3;
+                       else if (i >= 1)
+                               e = i - 1;
                        else
                                e = 0;
                }
index a61c7bc..63f468b 100644 (file)
@@ -177,7 +177,7 @@ void regression1_test(void)
        nr_threads = 2;
        pthread_barrier_init(&worker_barrier, NULL, nr_threads);
 
-       threads = malloc(nr_threads * sizeof(pthread_t *));
+       threads = malloc(nr_threads * sizeof(*threads));
 
        for (i = 0; i < nr_threads; i++) {
                arg = i;
index 666b56f..8dca8ac 100644 (file)
@@ -18,6 +18,7 @@ TARGETS += drivers/net/bonding
 TARGETS += drivers/net/team
 TARGETS += efivarfs
 TARGETS += exec
+TARGETS += fchmodat2
 TARGETS += filesystems
 TARGETS += filesystems/binderfs
 TARGETS += filesystems/epoll
index 71931b2..357adc7 100644 (file)
@@ -47,10 +47,8 @@ static int read_patterns(void)
 
                sprintf(pf, "/sys/kernel/debug/pcmtest/fill_pattern%d", i);
                fp = fopen(pf, "r");
-               if (!fp) {
-                       fclose(fpl);
+               if (!fp)
                        return -1;
-               }
                fread(patterns[i].buf, 1, patterns[i].len, fp);
                fclose(fp);
        }
index 9460cbe..28b93ca 100644 (file)
@@ -19,6 +19,8 @@ CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
 
 CFLAGS += $(KHDR_INCLUDES)
 
+CFLAGS += -I$(top_srcdir)/tools/include
+
 export CFLAGS
 export top_srcdir
 
@@ -42,7 +44,7 @@ run_tests: all
        done
 
 # Avoid any output on non arm64 on emit_tests
-emit_tests: all
+emit_tests:
        @for DIR in $(ARM64_SUBTARGETS); do                             \
                BUILD_TARGET=$(OUTPUT)/$$DIR;                   \
                make OUTPUT=$$BUILD_TARGET -C $$DIR $@;         \
index d4ad813..e3d2628 100644 (file)
 
 #include "../../kselftest.h"
 
-#define TESTS_PER_HWCAP 2
+#define TESTS_PER_HWCAP 3
 
 /*
- * Function expected to generate SIGILL when the feature is not
- * supported and return when it is supported. If SIGILL is generated
- * then the handler must be able to skip over the instruction safely.
+ * Function expected to generate exception when the feature is not
+ * supported and return when it is supported. If the specific exception
+ * is generated then the handler must be able to skip over the
+ * instruction safely.
  *
  * Note that it is expected that for many architecture extensions
  * there are no specific traps due to no architecture state being
  * added so we may not fault if running on a kernel which doesn't know
  * to add the hwcap.
  */
-typedef void (*sigill_fn)(void);
+typedef void (*sig_fn)(void);
+
+static void aes_sigill(void)
+{
+       /* AESE V0.16B, V0.16B */
+       asm volatile(".inst 0x4e284800" : : : );
+}
+
+static void atomics_sigill(void)
+{
+       /* STADD W0, [SP] */
+       asm volatile(".inst 0xb82003ff" : : : );
+}
+
+static void crc32_sigill(void)
+{
+       /* CRC32W W0, W0, W1 */
+       asm volatile(".inst 0x1ac14800" : : : );
+}
 
 static void cssc_sigill(void)
 {
@@ -39,6 +58,29 @@ static void cssc_sigill(void)
        asm volatile(".inst 0xdac01c00" : : : "x0");
 }
 
+static void fp_sigill(void)
+{
+       asm volatile("fmov s0, #1");
+}
+
+static void ilrcpc_sigill(void)
+{
+       /* LDAPUR W0, [SP, #8] */
+       asm volatile(".inst 0x994083e0" : : : );
+}
+
+static void jscvt_sigill(void)
+{
+       /* FJCVTZS W0, D0 */
+       asm volatile(".inst 0x1e7e0000" : : : );
+}
+
+static void lrcpc_sigill(void)
+{
+       /* LDAPR W0, [SP, #0] */
+       asm volatile(".inst 0xb8bfc3e0" : : : );
+}
+
 static void mops_sigill(void)
 {
        char dst[1], src[1];
@@ -53,11 +95,35 @@ static void mops_sigill(void)
                     : "cc", "memory");
 }
 
+static void pmull_sigill(void)
+{
+       /* PMULL V0.1Q, V0.1D, V0.1D */
+       asm volatile(".inst 0x0ee0e000" : : : );
+}
+
 static void rng_sigill(void)
 {
        asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
 }
 
+static void sha1_sigill(void)
+{
+       /* SHA1H S0, S0 */
+       asm volatile(".inst 0x5e280800" : : : );
+}
+
+static void sha2_sigill(void)
+{
+       /* SHA256H Q0, Q0, V0.4S */
+       asm volatile(".inst 0x5e004000" : : : );
+}
+
+static void sha512_sigill(void)
+{
+       /* SHA512H Q0, Q0, V0.2D */
+       asm volatile(".inst 0xce608000" : : : );
+}
+
 static void sme_sigill(void)
 {
        /* RDSVL x0, #0 */
@@ -208,15 +274,46 @@ static void svebf16_sigill(void)
        asm volatile(".inst 0x658aa000" : : : "z0");
 }
 
+static void hbc_sigill(void)
+{
+       /* BC.EQ +4 */
+       asm volatile("cmp xzr, xzr\n"
+                    ".inst 0x54000030" : : : "cc");
+}
+
+static void uscat_sigbus(void)
+{
+       /* unaligned atomic access */
+       asm volatile("ADD x1, sp, #2" : : : );
+       /* STADD W0, [X1] */
+       asm volatile(".inst 0xb820003f" : : : );
+}
+
 static const struct hwcap_data {
        const char *name;
        unsigned long at_hwcap;
        unsigned long hwcap_bit;
        const char *cpuinfo;
-       sigill_fn sigill_fn;
+       sig_fn sigill_fn;
        bool sigill_reliable;
+       sig_fn sigbus_fn;
+       bool sigbus_reliable;
 } hwcaps[] = {
        {
+               .name = "AES",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_AES,
+               .cpuinfo = "aes",
+               .sigill_fn = aes_sigill,
+       },
+       {
+               .name = "CRC32",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_CRC32,
+               .cpuinfo = "crc32",
+               .sigill_fn = crc32_sigill,
+       },
+       {
                .name = "CSSC",
                .at_hwcap = AT_HWCAP2,
                .hwcap_bit = HWCAP2_CSSC,
@@ -224,6 +321,50 @@ static const struct hwcap_data {
                .sigill_fn = cssc_sigill,
        },
        {
+               .name = "FP",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_FP,
+               .cpuinfo = "fp",
+               .sigill_fn = fp_sigill,
+       },
+       {
+               .name = "JSCVT",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_JSCVT,
+               .cpuinfo = "jscvt",
+               .sigill_fn = jscvt_sigill,
+       },
+       {
+               .name = "LRCPC",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_LRCPC,
+               .cpuinfo = "lrcpc",
+               .sigill_fn = lrcpc_sigill,
+       },
+       {
+               .name = "LRCPC2",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_ILRCPC,
+               .cpuinfo = "ilrcpc",
+               .sigill_fn = ilrcpc_sigill,
+       },
+       {
+               .name = "LSE",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_ATOMICS,
+               .cpuinfo = "atomics",
+               .sigill_fn = atomics_sigill,
+       },
+       {
+               .name = "LSE2",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_USCAT,
+               .cpuinfo = "uscat",
+               .sigill_fn = atomics_sigill,
+               .sigbus_fn = uscat_sigbus,
+               .sigbus_reliable = true,
+       },
+       {
                .name = "MOPS",
                .at_hwcap = AT_HWCAP2,
                .hwcap_bit = HWCAP2_MOPS,
@@ -232,6 +373,13 @@ static const struct hwcap_data {
                .sigill_reliable = true,
        },
        {
+               .name = "PMULL",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_PMULL,
+               .cpuinfo = "pmull",
+               .sigill_fn = pmull_sigill,
+       },
+       {
                .name = "RNG",
                .at_hwcap = AT_HWCAP2,
                .hwcap_bit = HWCAP2_RNG,
@@ -245,6 +393,27 @@ static const struct hwcap_data {
                .cpuinfo = "rprfm",
        },
        {
+               .name = "SHA1",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_SHA1,
+               .cpuinfo = "sha1",
+               .sigill_fn = sha1_sigill,
+       },
+       {
+               .name = "SHA2",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_SHA2,
+               .cpuinfo = "sha2",
+               .sigill_fn = sha2_sigill,
+       },
+       {
+               .name = "SHA512",
+               .at_hwcap = AT_HWCAP,
+               .hwcap_bit = HWCAP_SHA512,
+               .cpuinfo = "sha512",
+               .sigill_fn = sha512_sigill,
+       },
+       {
                .name = "SME",
                .at_hwcap = AT_HWCAP2,
                .hwcap_bit = HWCAP2_SME,
@@ -386,20 +555,32 @@ static const struct hwcap_data {
                .hwcap_bit = HWCAP2_SVE_EBF16,
                .cpuinfo = "sveebf16",
        },
+       {
+               .name = "HBC",
+               .at_hwcap = AT_HWCAP2,
+               .hwcap_bit = HWCAP2_HBC,
+               .cpuinfo = "hbc",
+               .sigill_fn = hbc_sigill,
+               .sigill_reliable = true,
+       },
 };
 
-static bool seen_sigill;
-
-static void handle_sigill(int sig, siginfo_t *info, void *context)
-{
-       ucontext_t *uc = context;
-
-       seen_sigill = true;
-
-       /* Skip over the offending instruction */
-       uc->uc_mcontext.pc += 4;
+typedef void (*sighandler_fn)(int, siginfo_t *, void *);
+
+#define DEF_SIGHANDLER_FUNC(SIG, NUM)                                  \
+static bool seen_##SIG;                                                        \
+static void handle_##SIG(int sig, siginfo_t *info, void *context)      \
+{                                                                      \
+       ucontext_t *uc = context;                                       \
+                                                                       \
+       seen_##SIG = true;                                              \
+       /* Skip over the offending instruction */                       \
+       uc->uc_mcontext.pc += 4;                                        \
 }
 
+DEF_SIGHANDLER_FUNC(sigill, SIGILL);
+DEF_SIGHANDLER_FUNC(sigbus, SIGBUS);
+
 bool cpuinfo_present(const char *name)
 {
        FILE *f;
@@ -442,24 +623,77 @@ bool cpuinfo_present(const char *name)
        return false;
 }
 
-int main(void)
+static int install_sigaction(int signum, sighandler_fn handler)
 {
-       const struct hwcap_data *hwcap;
-       int i, ret;
-       bool have_cpuinfo, have_hwcap;
+       int ret;
        struct sigaction sa;
 
-       ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP);
-
        memset(&sa, 0, sizeof(sa));
-       sa.sa_sigaction = handle_sigill;
+       sa.sa_sigaction = handler;
        sa.sa_flags = SA_RESTART | SA_SIGINFO;
        sigemptyset(&sa.sa_mask);
-       ret = sigaction(SIGILL, &sa, NULL);
+       ret = sigaction(signum, &sa, NULL);
        if (ret < 0)
-               ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n",
+               ksft_exit_fail_msg("Failed to install SIGNAL handler: %s (%d)\n",
+                                  strerror(errno), errno);
+
+       return ret;
+}
+
+static void uninstall_sigaction(int signum)
+{
+       if (sigaction(signum, NULL, NULL) < 0)
+               ksft_exit_fail_msg("Failed to uninstall SIGNAL handler: %s (%d)\n",
                                   strerror(errno), errno);
+}
+
+#define DEF_INST_RAISE_SIG(SIG, NUM)                                   \
+static bool inst_raise_##SIG(const struct hwcap_data *hwcap,           \
+                               bool have_hwcap)                        \
+{                                                                      \
+       if (!hwcap->SIG##_fn) {                                         \
+               ksft_test_result_skip(#SIG"_%s\n", hwcap->name);        \
+               /* assume that it would raise exception in default */   \
+               return true;                                            \
+       }                                                               \
+                                                                       \
+       install_sigaction(NUM, handle_##SIG);                           \
+                                                                       \
+       seen_##SIG = false;                                             \
+       hwcap->SIG##_fn();                                              \
+                                                                       \
+       if (have_hwcap) {                                               \
+               /* Should be able to use the extension */               \
+               ksft_test_result(!seen_##SIG,                           \
+                               #SIG"_%s\n", hwcap->name);              \
+       } else if (hwcap->SIG##_reliable) {                             \
+               /* Guaranteed a SIGNAL */                               \
+               ksft_test_result(seen_##SIG,                            \
+                               #SIG"_%s\n", hwcap->name);              \
+       } else {                                                        \
+               /* Missing SIGNAL might be fine */                      \
+               ksft_print_msg(#SIG"_%sreported for %s\n",              \
+                               seen_##SIG ? "" : "not ",               \
+                               hwcap->name);                           \
+               ksft_test_result_skip(#SIG"_%s\n",                      \
+                                       hwcap->name);                   \
+       }                                                               \
+                                                                       \
+       uninstall_sigaction(NUM);                                       \
+       return seen_##SIG;                                              \
+}
+
+DEF_INST_RAISE_SIG(sigill, SIGILL);
+DEF_INST_RAISE_SIG(sigbus, SIGBUS);
+
+int main(void)
+{
+       int i;
+       const struct hwcap_data *hwcap;
+       bool have_cpuinfo, have_hwcap, raise_sigill;
+
+       ksft_print_header();
+       ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP);
 
        for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
                hwcap = &hwcaps[i];
@@ -473,30 +707,15 @@ int main(void)
                ksft_test_result(have_hwcap == have_cpuinfo,
                                 "cpuinfo_match_%s\n", hwcap->name);
 
-               if (hwcap->sigill_fn) {
-                       seen_sigill = false;
-                       hwcap->sigill_fn();
-
-                       if (have_hwcap) {
-                               /* Should be able to use the extension */
-                               ksft_test_result(!seen_sigill, "sigill_%s\n",
-                                                hwcap->name);
-                       } else if (hwcap->sigill_reliable) {
-                               /* Guaranteed a SIGILL */
-                               ksft_test_result(seen_sigill, "sigill_%s\n",
-                                                hwcap->name);
-                       } else {
-                               /* Missing SIGILL might be fine */
-                               ksft_print_msg("SIGILL %sreported for %s\n",
-                                              seen_sigill ? "" : "not ",
-                                              hwcap->name);
-                               ksft_test_result_skip("sigill_%s\n",
-                                                     hwcap->name);
-                       }
-               } else {
-                       ksft_test_result_skip("sigill_%s\n",
-                                             hwcap->name);
-               }
+               /*
+                * Testing for SIGBUS only makes sense after make sure
+                * that the instruction does not cause a SIGILL signal.
+                */
+               raise_sigill = inst_raise_sigill(hwcap, have_hwcap);
+               if (!raise_sigill)
+                       inst_raise_sigbus(hwcap, have_hwcap);
+               else
+                       ksft_test_result_skip("sigbus_%s\n", hwcap->name);
        }
 
        ksft_print_cnts();
index 18cc123..d704511 100644 (file)
 
 #include "syscall-abi.h"
 
+/*
+ * The kernel defines a much larger SVE_VQ_MAX than is expressable in
+ * the architecture, this creates a *lot* of overhead filling the
+ * buffers (especially ZA) on emulated platforms so use the actual
+ * architectural maximum instead.
+ */
+#define ARCH_SVE_VQ_MAX 16
+
 static int default_sme_vl;
 
 static int sve_vl_count;
-static unsigned int sve_vls[SVE_VQ_MAX];
+static unsigned int sve_vls[ARCH_SVE_VQ_MAX];
 static int sme_vl_count;
-static unsigned int sme_vls[SVE_VQ_MAX];
+static unsigned int sme_vls[ARCH_SVE_VQ_MAX];
 
 extern void do_syscall(int sve_vl, int sme_vl);
 
@@ -130,9 +138,9 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
 
 #define SVE_Z_SHARED_BYTES (128 / 8)
 
-static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)];
-uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
-uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)];
+static uint8_t z_zero[__SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
 
 static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
                    uint64_t svcr)
@@ -190,8 +198,8 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
        return errors;
 }
 
-uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
-uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)];
+uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
 
 static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
                    uint64_t svcr)
@@ -222,8 +230,8 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
        return errors;
 }
 
-uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)];
-uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)];
+uint8_t ffr_in[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t ffr_out[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
 
 static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
                      uint64_t svcr)
@@ -300,8 +308,8 @@ static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
        return errors;
 }
 
-uint8_t za_in[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
-uint8_t za_out[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
+uint8_t za_in[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t za_out[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)];
 
 static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
                     uint64_t svcr)
@@ -470,9 +478,9 @@ void sve_count_vls(void)
                return;
 
        /*
-        * Enumerate up to SVE_VQ_MAX vector lengths
+        * Enumerate up to ARCH_SVE_VQ_MAX vector lengths
         */
-       for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) {
+       for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) {
                vl = prctl(PR_SVE_SET_VL, vq * 16);
                if (vl == -1)
                        ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
@@ -496,9 +504,9 @@ void sme_count_vls(void)
                return;
 
        /*
-        * Enumerate up to SVE_VQ_MAX vector lengths
+        * Enumerate up to ARCH_SVE_VQ_MAX vector lengths
         */
-       for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) {
+       for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) {
                vl = prctl(PR_SME_SET_VL, vq * 16);
                if (vl == -1)
                        ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
index ccdac41..05e4ee5 100644 (file)
@@ -2,8 +2,6 @@
 
 TEST_GEN_PROGS := btitest nobtitest
 
-PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS))
-
 # These tests are built as freestanding binaries since otherwise BTI
 # support in ld.so is required which is not currently widespread; when
 # it is available it will still be useful to test this separately as the
@@ -18,44 +16,41 @@ CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
 BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $<
 NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $<
 
-%-bti.o: %.c
+$(OUTPUT)/%-bti.o: %.c
        $(BTI_CC_COMMAND)
 
-%-bti.o: %.S
+$(OUTPUT)/%-bti.o: %.S
        $(BTI_CC_COMMAND)
 
-%-nobti.o: %.c
+$(OUTPUT)/%-nobti.o: %.c
        $(NOBTI_CC_COMMAND)
 
-%-nobti.o: %.S
+$(OUTPUT)/%-nobti.o: %.S
        $(NOBTI_CC_COMMAND)
 
 BTI_OBJS =                                      \
-       test-bti.o                           \
-       signal-bti.o                            \
-       start-bti.o                             \
-       syscall-bti.o                           \
-       system-bti.o                            \
-       teststubs-bti.o                         \
-       trampoline-bti.o
-gen/btitest: $(BTI_OBJS)
+       $(OUTPUT)/test-bti.o                    \
+       $(OUTPUT)/signal-bti.o                  \
+       $(OUTPUT)/start-bti.o                   \
+       $(OUTPUT)/syscall-bti.o                 \
+       $(OUTPUT)/system-bti.o                  \
+       $(OUTPUT)/teststubs-bti.o               \
+       $(OUTPUT)/trampoline-bti.o
+$(OUTPUT)/btitest: $(BTI_OBJS)
        $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
 
 NOBTI_OBJS =                                    \
-       test-nobti.o                         \
-       signal-nobti.o                          \
-       start-nobti.o                           \
-       syscall-nobti.o                         \
-       system-nobti.o                          \
-       teststubs-nobti.o                       \
-       trampoline-nobti.o
-gen/nobtitest: $(NOBTI_OBJS)
+       $(OUTPUT)/test-nobti.o                  \
+       $(OUTPUT)/signal-nobti.o                \
+       $(OUTPUT)/start-nobti.o                 \
+       $(OUTPUT)/syscall-nobti.o               \
+       $(OUTPUT)/system-nobti.o                \
+       $(OUTPUT)/teststubs-nobti.o             \
+       $(OUTPUT)/trampoline-nobti.o
+$(OUTPUT)/nobtitest: $(NOBTI_OBJS)
        $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
 
 # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
 # to account for any OUTPUT target-dirs optionally provided by
 # the toplevel makefile
 include ../../lib.mk
-
-$(TEST_GEN_PROGS): $(PROGS)
-       cp $(PROGS) $(OUTPUT)/
diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h
deleted file mode 100644 (file)
index ebb6204..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2019  Arm Limited
- * Original author: Dave Martin <Dave.Martin@arm.com>
- */
-
-#ifndef COMPILER_H
-#define COMPILER_H
-
-#define __always_unused __attribute__((__unused__))
-#define __noreturn __attribute__((__noreturn__))
-#define __unreachable() __builtin_unreachable()
-
-/* curse(e) has value e, but the compiler cannot assume so */
-#define curse(e) ({                            \
-       __typeof__(e) __curse_e = (e);          \
-       asm ("" : "+r" (__curse_e));            \
-       __curse_e;                              \
-})
-
-#endif /* ! COMPILER_H */
diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore
deleted file mode 100644 (file)
index 73869fa..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-btitest
-nobtitest
index 6385d8d..93d772b 100644 (file)
@@ -8,12 +8,10 @@
 
 #include <asm/unistd.h>
 
-#include "compiler.h"
-
 void __noreturn exit(int n)
 {
        syscall(__NR_exit, n);
-       __unreachable();
+       unreachable();
 }
 
 ssize_t write(int fd, const void *buf, size_t size)
index aca1185..2e9ee12 100644 (file)
@@ -14,12 +14,12 @@ typedef __kernel_size_t size_t;
 typedef __kernel_ssize_t ssize_t;
 
 #include <linux/errno.h>
+#include <linux/compiler.h>
+
 #include <asm/hwcap.h>
 #include <asm/ptrace.h>
 #include <asm/unistd.h>
 
-#include "compiler.h"
-
 long syscall(int nr, ...);
 
 void __noreturn exit(int n);
index 2cd8dce..28a8e8a 100644 (file)
@@ -17,7 +17,6 @@
 typedef struct ucontext ucontext_t;
 
 #include "btitest.h"
-#include "compiler.h"
 #include "signal.h"
 
 #define EXPECTED_TESTS 18
index 9bcfcdc..5f648b9 100644 (file)
@@ -6,6 +6,7 @@
 #include <assert.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -39,9 +40,11 @@ struct vec_data {
        int max_vl;
 };
 
+#define VEC_SVE 0
+#define VEC_SME 1
 
 static struct vec_data vec_data[] = {
-       {
+       [VEC_SVE] = {
                .name = "SVE",
                .hwcap_type = AT_HWCAP,
                .hwcap = HWCAP_SVE,
@@ -51,7 +54,7 @@ static struct vec_data vec_data[] = {
                .prctl_set = PR_SVE_SET_VL,
                .default_vl_file = "/proc/sys/abi/sve_default_vector_length",
        },
-       {
+       [VEC_SME] = {
                .name = "SME",
                .hwcap_type = AT_HWCAP2,
                .hwcap = HWCAP2_SME,
@@ -551,7 +554,8 @@ static void prctl_set_onexec(struct vec_data *data)
 /* For each VQ verify that setting via prctl() does the right thing */
 static void prctl_set_all_vqs(struct vec_data *data)
 {
-       int ret, vq, vl, new_vl;
+       int ret, vq, vl, new_vl, i;
+       int orig_vls[ARRAY_SIZE(vec_data)];
        int errors = 0;
 
        if (!data->min_vl || !data->max_vl) {
@@ -560,6 +564,9 @@ static void prctl_set_all_vqs(struct vec_data *data)
                return;
        }
 
+       for (i = 0; i < ARRAY_SIZE(vec_data); i++)
+               orig_vls[i] = vec_data[i].rdvl();
+
        for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
                vl = sve_vl_from_vq(vq);
 
@@ -582,6 +589,22 @@ static void prctl_set_all_vqs(struct vec_data *data)
                        errors++;
                }
 
+               /* Did any other VLs change? */
+               for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+                       if (&vec_data[i] == data)
+                               continue;
+
+                       if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap))
+                               continue;
+
+                       if (vec_data[i].rdvl() != orig_vls[i]) {
+                               ksft_print_msg("%s VL changed from %d to %d\n",
+                                              vec_data[i].name, orig_vls[i],
+                                              vec_data[i].rdvl());
+                               errors++;
+                       }
+               }
+
                /* Was that the VL we asked for? */
                if (new_vl == vl)
                        continue;
@@ -644,18 +667,107 @@ static const test_type tests[] = {
        prctl_set_all_vqs,
 };
 
+static inline void smstart(void)
+{
+       asm volatile("msr S0_3_C4_C7_3, xzr");
+}
+
+static inline void smstart_sm(void)
+{
+       asm volatile("msr S0_3_C4_C3_3, xzr");
+}
+
+static inline void smstop(void)
+{
+       asm volatile("msr S0_3_C4_C6_3, xzr");
+}
+
+
+/*
+ * Verify we can change the SVE vector length while SME is active and
+ * continue to use SME afterwards.
+ */
+static void change_sve_with_za(void)
+{
+       struct vec_data *sve_data = &vec_data[VEC_SVE];
+       bool pass = true;
+       int ret, i;
+
+       if (sve_data->min_vl == sve_data->max_vl) {
+               ksft_print_msg("Only one SVE VL supported, can't change\n");
+               ksft_test_result_skip("change_sve_while_sme\n");
+               return;
+       }
+
+       /* Ensure we will trigger a change when we set the maximum */
+       ret = prctl(sve_data->prctl_set, sve_data->min_vl);
+       if (ret != sve_data->min_vl) {
+               ksft_print_msg("Failed to set SVE VL %d: %d\n",
+                              sve_data->min_vl, ret);
+               pass = false;
+       }
+
+       /* Enable SM and ZA */
+       smstart();
+
+       /* Trigger another VL change */
+       ret = prctl(sve_data->prctl_set, sve_data->max_vl);
+       if (ret != sve_data->max_vl) {
+               ksft_print_msg("Failed to set SVE VL %d: %d\n",
+                              sve_data->max_vl, ret);
+               pass = false;
+       }
+
+       /*
+        * Spin for a bit with SM enabled to try to trigger another
+        * save/restore.  We can't use syscalls without exiting
+        * streaming mode.
+        */
+       for (i = 0; i < 100000000; i++)
+               smstart_sm();
+
+       /*
+        * TODO: Verify that ZA was preserved over the VL change and
+        * spin.
+        */
+
+       /* Clean up after ourselves */
+       smstop();
+       ret = prctl(sve_data->prctl_set, sve_data->default_vl);
+       if (ret != sve_data->default_vl) {
+               ksft_print_msg("Failed to restore SVE VL %d: %d\n",
+                              sve_data->default_vl, ret);
+               pass = false;
+       }
+
+       ksft_test_result(pass, "change_sve_with_za\n");
+}
+
+typedef void (*test_all_type)(void);
+
+static const struct {
+       const char *name;
+       test_all_type test;
+}  all_types_tests[] = {
+       { "change_sve_with_za", change_sve_with_za },
+};
+
 int main(void)
 {
+       bool all_supported = true;
        int i, j;
 
        ksft_print_header();
-       ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data));
+       ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data) +
+                     ARRAY_SIZE(all_types_tests));
 
        for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
                struct vec_data *data = &vec_data[i];
                unsigned long supported;
 
                supported = getauxval(data->hwcap_type) & data->hwcap;
+               if (!supported)
+                       all_supported = false;
 
                for (j = 0; j < ARRAY_SIZE(tests); j++) {
                        if (supported)
@@ -666,5 +778,12 @@ int main(void)
                }
        }
 
+       for (i = 0; i < ARRAY_SIZE(all_types_tests); i++) {
+               if (all_supported)
+                       all_types_tests[i].test();
+               else
+                       ksft_test_result_skip("%s\n", all_types_tests[i].name);
+       }
+
        ksft_exit_pass();
 }
index 222093f..762c8fe 100644 (file)
@@ -8,6 +8,8 @@
 #include <stdio.h>
 #include <string.h>
 
+#include <linux/compiler.h>
+
 #include "test_signals.h"
 
 int test_init(struct tdescr *td);
@@ -60,13 +62,25 @@ static __always_inline bool get_current_context(struct tdescr *td,
                                                size_t dest_sz)
 {
        static volatile bool seen_already;
+       int i;
+       char *uc = (char *)dest_uc;
 
        assert(td && dest_uc);
        /* it's a genuine invocation..reinit */
        seen_already = 0;
        td->live_uc_valid = 0;
        td->live_sz = dest_sz;
-       memset(dest_uc, 0x00, td->live_sz);
+
+       /*
+        * This is a memset() but we don't want the compiler to
+        * optimise it into either instructions or a library call
+        * which might be incompatible with streaming mode.
+        */
+       for (i = 0; i < td->live_sz; i++) {
+               uc[i] = 0;
+               OPTIMIZER_HIDE_VAR(uc[0]);
+       }
+
        td->live_uc = dest_uc;
        /*
         * Grab ucontext_t triggering a SIGTRAP.
@@ -104,6 +118,17 @@ static __always_inline bool get_current_context(struct tdescr *td,
                      : "memory");
 
        /*
+        * If we were grabbing a streaming mode context then we may
+        * have entered streaming mode behind the system's back and
+        * libc or compiler generated code might decide to do
+        * something invalid in streaming mode, or potentially even
+        * the state of ZA.  Issue a SMSTOP to exit both now we have
+        * grabbed the state.
+        */
+       if (td->feats_supported & FEAT_SME)
+               asm volatile("msr S0_3_C4_C6_3, xzr");
+
+       /*
         * If we get here with seen_already==1 it implies the td->live_uc
         * context has been used to get back here....this probably means
         * a test has failed to cause a SEGV...anyway live_uc does not
index e1eb4d5..2e384d7 100644 (file)
@@ -65,6 +65,7 @@ int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
        if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET,
                   ZT_SIG_REGS_SIZE(zt->nregs)) != 0) {
                fprintf(stderr, "ZT data invalid\n");
+               free(zeros);
                return 1;
        }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c
new file mode 100644 (file)
index 0000000..118abc2
--- /dev/null
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "async_stack_depth.skel.h"
+
+void test_async_stack_depth(void)
+{
+       RUN_TESTS(async_stack_depth);
+}
index b4f6f3a..5674a9d 100644 (file)
@@ -869,6 +869,77 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
        xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
 }
 
+static void redir_partial(int family, int sotype, int sock_map, int parser_map)
+{
+       int s, c0, c1, p0, p1;
+       int err, n, key, value;
+       char buf[] = "abc";
+
+       key = 0;
+       value = sizeof(buf) - 1;
+       err = xbpf_map_update_elem(parser_map, &key, &value, 0);
+       if (err)
+               return;
+
+       s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+       if (s < 0)
+               goto clean_parser_map;
+
+       err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+       if (err)
+               goto close_srv;
+
+       err = add_to_sockmap(sock_map, p0, p1);
+       if (err)
+               goto close;
+
+       n = xsend(c1, buf, sizeof(buf), 0);
+       if (n < sizeof(buf))
+               FAIL("incomplete write");
+
+       n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
+       if (n != sizeof(buf) - 1)
+               FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
+
+close:
+       xclose(c0);
+       xclose(p0);
+       xclose(c1);
+       xclose(p1);
+close_srv:
+       xclose(s);
+
+clean_parser_map:
+       key = 0;
+       value = 0;
+       xbpf_map_update_elem(parser_map, &key, &value, 0);
+}
+
+static void test_skb_redir_partial(struct test_sockmap_listen *skel,
+                                  struct bpf_map *inner_map, int family,
+                                  int sotype)
+{
+       int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+       int parser = bpf_program__fd(skel->progs.prog_stream_parser);
+       int parser_map = bpf_map__fd(skel->maps.parser_map);
+       int sock_map = bpf_map__fd(inner_map);
+       int err;
+
+       err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+       if (err)
+               return;
+
+       err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+       if (err)
+               goto detach;
+
+       redir_partial(family, sotype, sock_map, parser_map);
+
+       xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+       xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
 static void test_reuseport_select_listening(int family, int sotype,
                                            int sock_map, int verd_map,
                                            int reuseport_prog)
@@ -1243,6 +1314,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
        } tests[] = {
                TEST(test_skb_redir_to_connected),
                TEST(test_skb_redir_to_listening),
+               TEST(test_skb_redir_partial),
                TEST(test_msg_redir_to_connected),
                TEST(test_msg_redir_to_listening),
        };
@@ -1432,7 +1504,7 @@ static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
        if (n < 1)
                goto out;
 
-       n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT);
+       n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
        if (n < 0)
                FAIL("%s: recv() err, errno=%d", log_prefix, errno);
        if (n == 0)
diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c
new file mode 100644 (file)
index 0000000..3517c0e
--- /dev/null
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct hmap_elem {
+       struct bpf_timer timer;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 64);
+       __type(key, int);
+       __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+__attribute__((noinline))
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+       volatile char buf[256] = {};
+       return buf[69];
+}
+
+__attribute__((noinline))
+static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+       volatile char buf[300] = {};
+       return buf[255] + timer_cb(NULL, NULL, NULL);
+}
+
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is 576. Too large")
+int pseudo_call_check(struct __sk_buff *ctx)
+{
+       struct hmap_elem *elem;
+       volatile char buf[256] = {};
+
+       elem = bpf_map_lookup_elem(&hmap, &(int){0});
+       if (!elem)
+               return 0;
+
+       timer_cb(NULL, NULL, NULL);
+       return bpf_timer_set_callback(&elem->timer, timer_cb) + buf[0];
+}
+
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is 608. Too large")
+int async_call_root_check(struct __sk_buff *ctx)
+{
+       struct hmap_elem *elem;
+       volatile char buf[256] = {};
+
+       elem = bpf_map_lookup_elem(&hmap, &(int){0});
+       if (!elem)
+               return 0;
+
+       return bpf_timer_set_callback(&elem->timer, bad_timer_cb) + buf[0];
+}
+
+char _license[] SEC("license") = "GPL";
index 325c9f1..464d35b 100644 (file)
@@ -28,12 +28,26 @@ struct {
        __type(value, unsigned int);
 } verdict_map SEC(".maps");
 
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(max_entries, 1);
+       __type(key, int);
+       __type(value, int);
+} parser_map SEC(".maps");
+
 bool test_sockmap = false; /* toggled by user-space */
 bool test_ingress = false; /* toggled by user-space */
 
 SEC("sk_skb/stream_parser")
 int prog_stream_parser(struct __sk_buff *skb)
 {
+       int *value;
+       __u32 key = 0;
+
+       value = bpf_map_lookup_elem(&parser_map, &key);
+       if (value && *value)
+               return *value;
+
        return skb->len;
 }
 
index 54d09b8..c037553 100644 (file)
@@ -4,10 +4,12 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/kernel.h>
+#include <linux/magic.h>
 #include <linux/mman.h>
 #include <sys/mman.h>
 #include <sys/shm.h>
 #include <sys/syscall.h>
+#include <sys/vfs.h>
 #include <unistd.h>
 #include <string.h>
 #include <fcntl.h>
@@ -15,6 +17,8 @@
 
 #include "../kselftest.h"
 
+#define NR_TESTS       9
+
 static const char * const dev_files[] = {
        "/dev/zero", "/dev/null", "/dev/urandom",
        "/proc/version", "/proc"
@@ -91,19 +95,33 @@ out:
 }
 
 /*
+ * fsync() is implemented via noop_fsync() on tmpfs. This makes the fsync()
+ * test fail below, so we need to check for test file living on a tmpfs.
+ */
+static bool is_on_tmpfs(int fd)
+{
+       struct statfs statfs_buf;
+
+       if (fstatfs(fd, &statfs_buf))
+               return false;
+
+       return statfs_buf.f_type == TMPFS_MAGIC;
+}
+
+/*
  * Open/create the file at filename, (optionally) write random data to it
  * (exactly num_pages), then test the cachestat syscall on this file.
  *
  * If test_fsync == true, fsync the file, then check the number of dirty
  * pages.
  */
-bool test_cachestat(const char *filename, bool write_random, bool create,
-               bool test_fsync, unsigned long num_pages, int open_flags,
-               mode_t open_mode)
+static int test_cachestat(const char *filename, bool write_random, bool create,
+                         bool test_fsync, unsigned long num_pages,
+                         int open_flags, mode_t open_mode)
 {
        size_t PS = sysconf(_SC_PAGESIZE);
        int filesize = num_pages * PS;
-       bool ret = true;
+       int ret = KSFT_PASS;
        long syscall_ret;
        struct cachestat cs;
        struct cachestat_range cs_range = { 0, filesize };
@@ -112,7 +130,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 
        if (fd == -1) {
                ksft_print_msg("Unable to create/open file.\n");
-               ret = false;
+               ret = KSFT_FAIL;
                goto out;
        } else {
                ksft_print_msg("Create/open %s\n", filename);
@@ -121,7 +139,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
        if (write_random) {
                if (!write_exactly(fd, filesize)) {
                        ksft_print_msg("Unable to access urandom.\n");
-                       ret = false;
+                       ret = KSFT_FAIL;
                        goto out1;
                }
        }
@@ -132,7 +150,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
 
        if (syscall_ret) {
                ksft_print_msg("Cachestat returned non-zero.\n");
-               ret = false;
+               ret = KSFT_FAIL;
                goto out1;
 
        } else {
@@ -142,15 +160,17 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
                        if (cs.nr_cache + cs.nr_evicted != num_pages) {
                                ksft_print_msg(
                                        "Total number of cached and evicted pages is off.\n");
-                               ret = false;
+                               ret = KSFT_FAIL;
                        }
                }
        }
 
        if (test_fsync) {
-               if (fsync(fd)) {
+               if (is_on_tmpfs(fd)) {
+                       ret = KSFT_SKIP;
+               } else if (fsync(fd)) {
                        ksft_print_msg("fsync fails.\n");
-                       ret = false;
+                       ret = KSFT_FAIL;
                } else {
                        syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
 
@@ -161,13 +181,13 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
                                print_cachestat(&cs);
 
                                if (cs.nr_dirty) {
-                                       ret = false;
+                                       ret = KSFT_FAIL;
                                        ksft_print_msg(
                                                "Number of dirty should be zero after fsync.\n");
                                }
                        } else {
                                ksft_print_msg("Cachestat (after fsync) returned non-zero.\n");
-                               ret = false;
+                               ret = KSFT_FAIL;
                                goto out1;
                        }
                }
@@ -236,13 +256,29 @@ out:
 
 int main(void)
 {
-       int ret = 0;
+       int ret;
+
+       ksft_print_header();
+
+       ret = syscall(__NR_cachestat, -1, NULL, NULL, 0);
+       if (ret == -1 && errno == ENOSYS)
+               ksft_exit_skip("cachestat syscall not available\n");
+
+       ksft_set_plan(NR_TESTS);
+
+       if (ret == -1 && errno == EBADF) {
+               ksft_test_result_pass("bad file descriptor recognized\n");
+               ret = 0;
+       } else {
+               ksft_test_result_fail("bad file descriptor ignored\n");
+               ret = 1;
+       }
 
        for (int i = 0; i < 5; i++) {
                const char *dev_filename = dev_files[i];
 
                if (test_cachestat(dev_filename, false, false, false,
-                       4, O_RDONLY, 0400))
+                       4, O_RDONLY, 0400) == KSFT_PASS)
                        ksft_test_result_pass("cachestat works with %s\n", dev_filename);
                else {
                        ksft_test_result_fail("cachestat fails with %s\n", dev_filename);
@@ -251,13 +287,27 @@ int main(void)
        }
 
        if (test_cachestat("tmpfilecachestat", true, true,
-               true, 4, O_CREAT | O_RDWR, 0400 | 0600))
+               false, 4, O_CREAT | O_RDWR, 0600) == KSFT_PASS)
                ksft_test_result_pass("cachestat works with a normal file\n");
        else {
                ksft_test_result_fail("cachestat fails with normal file\n");
                ret = 1;
        }
 
+       switch (test_cachestat("tmpfilecachestat", true, true,
+               true, 4, O_CREAT | O_RDWR, 0600)) {
+       case KSFT_FAIL:
+               ksft_test_result_fail("cachestat fsync fails with normal file\n");
+               ret = KSFT_FAIL;
+               break;
+       case KSFT_PASS:
+               ksft_test_result_pass("cachestat fsync works with a normal file\n");
+               break;
+       case KSFT_SKIP:
+               ksft_test_result_skip("tmpfilecachestat is on tmpfs\n");
+               break;
+       }
+
        if (test_cachestat_shmem())
                ksft_test_result_pass("cachestat works with a shmem file\n");
        else {
index 258ddc5..ed2e50b 100644 (file)
@@ -70,12 +70,16 @@ static int test_kmem_basic(const char *root)
                goto cleanup;
 
        cg_write(cg, "memory.high", "1M");
+
+       /* wait for RCU freeing */
+       sleep(1);
+
        slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
-       if (slab1 <= 0)
+       if (slab1 < 0)
                goto cleanup;
 
        current = cg_read_long(cg, "memory.current");
-       if (current <= 0)
+       if (current < 0)
                goto cleanup;
 
        if (slab1 < slab0 / 2 && current < slab0 / 2)
index 03f92d7..8a72bb7 100644 (file)
@@ -9,10 +9,12 @@ TEST_PROGS := \
        mode-1-recovery-updelay.sh \
        mode-2-recovery-updelay.sh \
        bond_options.sh \
-       bond-eth-type-change.sh
+       bond-eth-type-change.sh \
+       bond_macvlan.sh
 
 TEST_FILES := \
        lag_lib.sh \
+       bond_topo_2d1c.sh \
        bond_topo_3d1c.sh \
        net_forwarding_lib.sh
 
index 47ab905..6358df5 100755 (executable)
@@ -57,8 +57,8 @@ ip link add name veth2-bond type veth peer name veth2-end
 
 # add ports
 ip link set fbond master fab-br0
-ip link set veth1-bond down master fbond
-ip link set veth2-bond down master fbond
+ip link set veth1-bond master fbond
+ip link set veth2-bond master fbond
 
 # bring up
 ip link set veth1-end up
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
new file mode 100755 (executable)
index 0000000..b609fb6
--- /dev/null
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan over balance-alb
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+m1_ns="m1-$(mktemp -u XXXXXX)"
+m2_ns="m1-$(mktemp -u XXXXXX)"
+m1_ip4="192.0.2.11"
+m1_ip6="2001:db8::11"
+m2_ip4="192.0.2.12"
+m2_ip6="2001:db8::12"
+
+cleanup()
+{
+       ip -n ${m1_ns} link del macv0
+       ip netns del ${m1_ns}
+       ip -n ${m2_ns} link del macv0
+       ip netns del ${m2_ns}
+
+       client_destroy
+       server_destroy
+       gateway_destroy
+}
+
+check_connection()
+{
+       local ns=${1}
+       local target=${2}
+       local message=${3:-"macvlan_over_bond"}
+       RET=0
+
+
+       ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+       check_err $? "ping failed"
+       log_test "$mode: $message"
+}
+
+macvlan_over_bond()
+{
+       local param="$1"
+       RET=0
+
+       # setup new bond mode
+       bond_reset "${param}"
+
+       ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+       ip -n ${s_ns} link set macv0 netns ${m1_ns}
+       ip -n ${m1_ns} link set dev macv0 up
+       ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0
+       ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0
+
+       ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+       ip -n ${s_ns} link set macv0 netns ${m2_ns}
+       ip -n ${m2_ns} link set dev macv0 up
+       ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0
+       ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0
+
+       sleep 2
+
+       check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+       check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+       check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1"
+       check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1"
+       check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2"
+       check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2"
+       check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2"
+       check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2"
+
+
+       sleep 5
+
+       check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+       check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+       check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client"
+       check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client"
+       check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client"
+       check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client"
+       check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2"
+       check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2"
+
+       ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${m1_ns}
+ip netns add ${m2_ns}
+
+modes="active-backup balance-tlb balance-alb"
+
+for mode in $modes; do
+       macvlan_over_bond "mode $mode"
+done
+
+exit $EXIT_STATUS
index 607ba5c..c54d169 100755 (executable)
@@ -9,10 +9,7 @@ ALL_TESTS="
        num_grat_arp
 "
 
-REQUIRE_MZ=no
-NUM_NETIFS=0
 lib_dir=$(dirname "$0")
-source ${lib_dir}/net_forwarding_lib.sh
 source ${lib_dir}/bond_topo_3d1c.sh
 
 skip_prio()
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
new file mode 100644 (file)
index 0000000..a509ef9
--- /dev/null
@@ -0,0 +1,158 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+#  +-------------------------+
+#  |          bond0          |  Server
+#  |            +            |  192.0.2.1/24
+#  |      eth0  |  eth1      |  2001:db8::1/24
+#  |        +---+---+        |
+#  |        |       |        |
+#  +-------------------------+
+#           |       |
+#  +-------------------------+
+#  |        |       |        |
+#  |    +---+-------+---+    |  Gateway
+#  |    |      br0      |    |  192.0.2.254/24
+#  |    +-------+-------+    |  2001:db8::254/24
+#  |            |            |
+#  +-------------------------+
+#               |
+#  +-------------------------+
+#  |            |            |  Client
+#  |            +            |  192.0.2.10/24
+#  |          eth0           |  2001:db8::10/24
+#  +-------------------------+
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source ${lib_dir}/net_forwarding_lib.sh
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+
+gateway_create()
+{
+       ip netns add ${g_ns}
+       ip -n ${g_ns} link add br0 type bridge
+       ip -n ${g_ns} link set br0 up
+       ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+       ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+       ip -n ${g_ns} link del br0
+       ip netns del ${g_ns}
+}
+
+server_create()
+{
+       ip netns add ${s_ns}
+       ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+       for i in $(seq 0 1); do
+               ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+
+               ip -n ${g_ns} link set s${i} up
+               ip -n ${g_ns} link set s${i} master br0
+               ip -n ${s_ns} link set eth${i} master bond0
+
+               tc -n ${g_ns} qdisc add dev s${i} clsact
+       done
+
+       ip -n ${s_ns} link set bond0 up
+       ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+       ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+       sleep 2
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+       # Count the eth link number in real-time as this function
+       # maybe called from other topologies.
+       local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+       local param="$1"
+       link_num=$((link_num -1))
+
+       ip -n ${s_ns} link set bond0 down
+       ip -n ${s_ns} link del bond0
+
+       ip -n ${s_ns} link add bond0 type bond $param
+       for i in $(seq 0 ${link_num}); do
+               ip -n ${s_ns} link set eth$i master bond0
+       done
+
+       ip -n ${s_ns} link set bond0 up
+       ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+       ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+       sleep 2
+}
+
+server_destroy()
+{
+       # Count the eth link number in real-time as this function
+       # maybe called from other topologies.
+       local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+       link_num=$((link_num -1))
+       for i in $(seq 0 ${link_num}); do
+               ip -n ${s_ns} link del eth${i}
+       done
+       ip netns del ${s_ns}
+}
+
+client_create()
+{
+       ip netns add ${c_ns}
+       ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+       ip -n ${g_ns} link set c0 up
+       ip -n ${g_ns} link set c0 master br0
+
+       ip -n ${c_ns} link set eth0 up
+       ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+       ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+       ip -n ${c_ns} link del eth0
+       ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+       gateway_create
+       server_create
+       client_create
+}
+
+cleanup()
+{
+       pre_cleanup
+
+       client_destroy
+       server_destroy
+       gateway_destroy
+}
+
+bond_check_connection()
+{
+       local msg=${1:-"check connection"}
+
+       sleep 2
+       ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+       check_err $? "${msg}: ping failed"
+       ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+       check_err $? "${msg}: ping6 failed"
+}
index 69ab99a..3a1333d 100644 (file)
 #  |                eth0                 |  2001:db8::10/24
 #  +-------------------------------------+
 
-s_ns="s-$(mktemp -u XXXXXX)"
-c_ns="c-$(mktemp -u XXXXXX)"
-g_ns="g-$(mktemp -u XXXXXX)"
-s_ip4="192.0.2.1"
-c_ip4="192.0.2.10"
-g_ip4="192.0.2.254"
-s_ip6="2001:db8::1"
-c_ip6="2001:db8::10"
-g_ip6="2001:db8::254"
-
-gateway_create()
-{
-       ip netns add ${g_ns}
-       ip -n ${g_ns} link add br0 type bridge
-       ip -n ${g_ns} link set br0 up
-       ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
-       ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
-}
-
-gateway_destroy()
-{
-       ip -n ${g_ns} link del br0
-       ip netns del ${g_ns}
-}
-
-server_create()
-{
-       ip netns add ${s_ns}
-       ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
-
-       for i in $(seq 0 2); do
-               ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
-
-               ip -n ${g_ns} link set s${i} up
-               ip -n ${g_ns} link set s${i} master br0
-               ip -n ${s_ns} link set eth${i} master bond0
-
-               tc -n ${g_ns} qdisc add dev s${i} clsact
-       done
-
-       ip -n ${s_ns} link set bond0 up
-       ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
-       ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
-       sleep 2
-}
-
-# Reset bond with new mode and options
-bond_reset()
-{
-       local param="$1"
-
-       ip -n ${s_ns} link set bond0 down
-       ip -n ${s_ns} link del bond0
-
-       ip -n ${s_ns} link add bond0 type bond $param
-       for i in $(seq 0 2); do
-               ip -n ${s_ns} link set eth$i master bond0
-       done
-
-       ip -n ${s_ns} link set bond0 up
-       ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
-       ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
-       sleep 2
-}
-
-server_destroy()
-{
-       for i in $(seq 0 2); do
-               ip -n ${s_ns} link del eth${i}
-       done
-       ip netns del ${s_ns}
-}
-
-client_create()
-{
-       ip netns add ${c_ns}
-       ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
-
-       ip -n ${g_ns} link set c0 up
-       ip -n ${g_ns} link set c0 master br0
-
-       ip -n ${c_ns} link set eth0 up
-       ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
-       ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
-}
-
-client_destroy()
-{
-       ip -n ${c_ns} link del eth0
-       ip netns del ${c_ns}
-}
+source bond_topo_2d1c.sh
 
 setup_prepare()
 {
        gateway_create
        server_create
        client_create
-}
-
-cleanup()
-{
-       pre_cleanup
-
-       client_destroy
-       server_destroy
-       gateway_destroy
-}
-
-bond_check_connection()
-{
-       local msg=${1:-"check connection"}
 
-       sleep 2
-       ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
-       check_err $? "${msg}: ping failed"
-       ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
-       check_err $? "${msg}: ping6 failed"
+       # Add the extra device as we use 3 down links for bond0
+       local i=2
+       ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+       ip -n ${g_ns} link set s${i} up
+       ip -n ${g_ns} link set s${i} master br0
+       ip -n ${s_ns} link set eth${i} master bond0
+       tc -n ${g_ns} qdisc add dev s${i} clsact
 }
index 7d9e73a..0c47faf 100755 (executable)
@@ -98,12 +98,12 @@ sb_occ_etc_check()
 
 port_pool_test()
 {
-       local exp_max_occ=288
+       local exp_max_occ=$(devlink_cell_size_get)
        local max_occ
 
        devlink sb occupancy clearmax $DEVLINK_DEV
 
-       $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+       $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
                -t ip -q
 
        devlink sb occupancy snapshot $DEVLINK_DEV
@@ -126,12 +126,12 @@ port_pool_test()
 
 port_tc_ip_test()
 {
-       local exp_max_occ=288
+       local exp_max_occ=$(devlink_cell_size_get)
        local max_occ
 
        devlink sb occupancy clearmax $DEVLINK_DEV
 
-       $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+       $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
                -t ip -q
 
        devlink sb occupancy snapshot $DEVLINK_DEV
@@ -154,16 +154,12 @@ port_tc_ip_test()
 
 port_tc_arp_test()
 {
-       local exp_max_occ=96
+       local exp_max_occ=$(devlink_cell_size_get)
        local max_occ
 
-       if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
-               exp_max_occ=144
-       fi
-
        devlink sb occupancy clearmax $DEVLINK_DEV
 
-       $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+       $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q
 
        devlink sb occupancy snapshot $DEVLINK_DEV
 
diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
new file mode 100644 (file)
index 0000000..20839f8
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES)
+TEST_GEN_PROGS := fchmodat2_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
new file mode 100644 (file)
index 0000000..e031941
--- /dev/null
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags)
+{
+       int ret = syscall(__NR_fchmodat2, dfd, filename, mode, flags);
+
+       return ret >= 0 ? ret : -errno;
+}
+
+int setup_testdir(void)
+{
+       int dfd, ret;
+       char dirname[] = "/tmp/ksft-fchmodat2.XXXXXX";
+
+       /* Make the top-level directory. */
+       if (!mkdtemp(dirname))
+               ksft_exit_fail_msg("%s: failed to create tmpdir\n", __func__);
+
+       dfd = open(dirname, O_PATH | O_DIRECTORY);
+       if (dfd < 0)
+               ksft_exit_fail_msg("%s: failed to open tmpdir\n", __func__);
+
+       ret = openat(dfd, "regfile", O_CREAT | O_WRONLY | O_TRUNC, 0644);
+       if (ret < 0)
+               ksft_exit_fail_msg("%s: failed to create file in tmpdir\n",
+                               __func__);
+       close(ret);
+
+       ret = symlinkat("regfile", dfd, "symlink");
+       if (ret < 0)
+               ksft_exit_fail_msg("%s: failed to create symlink in tmpdir\n",
+                               __func__);
+
+       return dfd;
+}
+
+int expect_mode(int dfd, const char *filename, mode_t expect_mode)
+{
+       struct stat st;
+       int ret = fstatat(dfd, filename, &st, AT_SYMLINK_NOFOLLOW);
+
+       if (ret)
+               ksft_exit_fail_msg("%s: %s: fstatat failed\n",
+                               __func__, filename);
+
+       return (st.st_mode == expect_mode);
+}
+
+void test_regfile(void)
+{
+       int dfd, ret;
+
+       dfd = setup_testdir();
+
+       ret = sys_fchmodat2(dfd, "regfile", 0640, 0);
+
+       if (ret < 0)
+               ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+       if (!expect_mode(dfd, "regfile", 0100640))
+               ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+                               __func__);
+
+       ret = sys_fchmodat2(dfd, "regfile", 0600, AT_SYMLINK_NOFOLLOW);
+
+       if (ret < 0)
+               ksft_exit_fail_msg("%s: fchmodat2(AT_SYMLINK_NOFOLLOW) failed\n",
+                               __func__);
+
+       if (!expect_mode(dfd, "regfile", 0100600))
+               ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+                               __func__);
+
+       ksft_test_result_pass("fchmodat2(regfile)\n");
+}
+
+void test_symlink(void)
+{
+       int dfd, ret;
+
+       dfd = setup_testdir();
+
+       ret = sys_fchmodat2(dfd, "symlink", 0640, 0);
+
+       if (ret < 0)
+               ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+       if (!expect_mode(dfd, "regfile", 0100640))
+               ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+                               __func__);
+
+       if (!expect_mode(dfd, "symlink", 0120777))
+               ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2\n",
+                               __func__);
+
+       ret = sys_fchmodat2(dfd, "symlink", 0600, AT_SYMLINK_NOFOLLOW);
+
+       /*
+        * On certain filesystems (xfs or btrfs), chmod operation fails. So we
+        * first check the symlink target but if the operation fails we mark the
+        * test as skipped.
+        *
+        * https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html
+        */
+       if (ret == 0 && !expect_mode(dfd, "symlink", 0120600))
+               ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2 with nofollow\n",
+                               __func__);
+
+       if (!expect_mode(dfd, "regfile", 0100640))
+               ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+                               __func__);
+
+       if (ret != 0)
+               ksft_test_result_skip("fchmodat2(symlink)\n");
+       else
+               ksft_test_result_pass("fchmodat2(symlink)\n");
+}
+
+#define NUM_TESTS 2
+
+int main(int argc, char **argv)
+{
+       ksft_print_header();
+       ksft_set_plan(NUM_TESTS);
+
+       test_regfile();
+       test_symlink();
+
+       if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+               ksft_exit_fail();
+       else
+               ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/filelock/Makefile b/tools/testing/selftests/filelock/Makefile
new file mode 100644 (file)
index 0000000..478e82f
--- /dev/null
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := ofdlocks
+
+include ../lib.mk
diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c
new file mode 100644 (file)
index 0000000..a55b798
--- /dev/null
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include "../kselftest.h"
+
+static int lock_set(int fd, struct flock *fl)
+{
+       int ret;
+
+       fl->l_pid = 0;          // needed for OFD locks
+       fl->l_whence = SEEK_SET;
+       ret = fcntl(fd, F_OFD_SETLK, fl);
+       if (ret)
+               perror("fcntl()");
+       return ret;
+}
+
+static int lock_get(int fd, struct flock *fl)
+{
+       int ret;
+
+       fl->l_pid = 0;          // needed for OFD locks
+       fl->l_whence = SEEK_SET;
+       ret = fcntl(fd, F_OFD_GETLK, fl);
+       if (ret)
+               perror("fcntl()");
+       return ret;
+}
+
+int main(void)
+{
+       int rc;
+       struct flock fl, fl2;
+       int fd = open("/tmp/aa", O_RDWR | O_CREAT | O_EXCL, 0600);
+       int fd2 = open("/tmp/aa", O_RDONLY);
+
+       unlink("/tmp/aa");
+       assert(fd != -1);
+       assert(fd2 != -1);
+       ksft_print_msg("[INFO] opened fds %i %i\n", fd, fd2);
+
+       /* Set some read lock */
+       fl.l_type = F_RDLCK;
+       fl.l_start = 5;
+       fl.l_len = 3;
+       rc = lock_set(fd, &fl);
+       if (rc == 0) {
+               ksft_print_msg
+                   ("[SUCCESS] set OFD read lock on first fd\n");
+       } else {
+               ksft_print_msg("[FAIL] to set OFD read lock on first fd\n");
+               return -1;
+       }
+       /* Make sure read locks do not conflict on different fds. */
+       fl.l_type = F_RDLCK;
+       fl.l_start = 5;
+       fl.l_len = 1;
+       rc = lock_get(fd2, &fl);
+       if (rc != 0)
+               return -1;
+       if (fl.l_type != F_UNLCK) {
+               ksft_print_msg("[FAIL] read locks conflicted\n");
+               return -1;
+       }
+       /* Make sure read/write locks do conflict on different fds. */
+       fl.l_type = F_WRLCK;
+       fl.l_start = 5;
+       fl.l_len = 1;
+       rc = lock_get(fd2, &fl);
+       if (rc != 0)
+               return -1;
+       if (fl.l_type != F_UNLCK) {
+               ksft_print_msg
+                   ("[SUCCESS] read and write locks conflicted\n");
+       } else {
+               ksft_print_msg
+                   ("[SUCCESS] read and write locks not conflicted\n");
+               return -1;
+       }
+       /* Get info about the lock on first fd. */
+       fl.l_type = F_UNLCK;
+       fl.l_start = 5;
+       fl.l_len = 1;
+       rc = lock_get(fd, &fl);
+       if (rc != 0) {
+               ksft_print_msg
+                   ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+               return -1;
+       }
+       if (fl.l_type != F_UNLCK) {
+               ksft_print_msg
+                   ("[SUCCESS] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+                    fl.l_type, fl.l_pid, fl.l_len);
+       } else {
+               ksft_print_msg
+                   ("[FAIL] F_OFD_GETLK with F_UNLCK did not return lock info\n");
+               return -1;
+       }
+       /* Try the same but by locking everything by len==0. */
+       fl2.l_type = F_UNLCK;
+       fl2.l_start = 0;
+       fl2.l_len = 0;
+       rc = lock_get(fd, &fl2);
+       if (rc != 0) {
+               ksft_print_msg
+                   ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+               return -1;
+       }
+       if (memcmp(&fl, &fl2, sizeof(fl))) {
+               ksft_print_msg
+                   ("[FAIL] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+                    fl.l_type, fl.l_pid, fl.l_len);
+               return -1;
+       }
+       ksft_print_msg("[SUCCESS] F_UNLCK with len==0 returned the same\n");
+       /* Get info about the lock on second fd - no locks on it. */
+       fl.l_type = F_UNLCK;
+       fl.l_start = 0;
+       fl.l_len = 0;
+       lock_get(fd2, &fl);
+       if (fl.l_type != F_UNLCK) {
+               ksft_print_msg
+                   ("[FAIL] F_OFD_GETLK with F_UNLCK return lock info from another fd\n");
+               return -1;
+       }
+       return 0;
+}
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
new file mode 100644 (file)
index 0000000..63b76cf
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Snapshot and tracing_cpumask
+# requires: trace_marker tracing_cpumask snapshot
+# flags: instance
+
+# This testcase is constrived to reproduce a problem that the cpu buffers
+# become unavailable which is due to 'record_disabled' of array_buffer and
+# max_buffer being messed up.
+
+# Store origin cpumask
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+# Stop tracing all cpu
+echo 0 > tracing_cpumask
+
+# Take a snapshot of the main buffer
+echo 1 > snapshot
+
+# Restore origin cpumask, note that there should be some cpus being traced
+echo ${ORIG_CPUMASK} > tracing_cpumask
+
+# Set tracing on
+echo 1 > tracing_on
+
+# Write a log into buffer
+echo "test input 1" > trace_marker
+
+# Ensure the log writed so that cpu buffers are still available
+grep -q "test input 1" trace
+exit 0
index b89de17..f34b14e 100644 (file)
@@ -13,7 +13,7 @@ if grep -qF "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README ; the
   FPROBES=yes
 fi
 
-if [ -z "$KPROBES" -a "$FPROBES" ] ; then
+if [ -z "$KPROBES" -a -z "$FPROBES" ] ; then
   exit_unsupported
 fi
 
index 681b906..4da48bf 100755 (executable)
@@ -79,6 +79,7 @@ recompile_kernel()
        cd "${kernel_checkout}"
 
        ${make_command} olddefconfig
+       ${make_command} headers
        ${make_command}
 }
 
index 5fd49ad..e05ac82 100644 (file)
@@ -938,7 +938,11 @@ void __wait_for_test(struct __test_metadata *t)
                fprintf(TH_LOG_STREAM,
                        "# %s: Test terminated by timeout\n", t->name);
        } else if (WIFEXITED(status)) {
-               if (t->termsig != -1) {
+               if (WEXITSTATUS(status) == 255) {
+                       /* SKIP */
+                       t->passed = 1;
+                       t->skip = 1;
+               } else if (t->termsig != -1) {
                        t->passed = 0;
                        fprintf(TH_LOG_STREAM,
                                "# %s: Test exited normally instead of by signal (code: %d)\n",
@@ -950,11 +954,6 @@ void __wait_for_test(struct __test_metadata *t)
                        case 0:
                                t->passed = 1;
                                break;
-                       /* SKIP */
-                       case 255:
-                               t->passed = 1;
-                               t->skip = 1;
-                               break;
                        /* Other failure, assume step report. */
                        default:
                                t->passed = 0;
index 07732a1..eb1ff59 100644 (file)
@@ -362,8 +362,10 @@ static inline void read_stats_header(int stats_fd, struct kvm_stats_header *head
 {
        ssize_t ret;
 
-       ret = read(stats_fd, header, sizeof(*header));
-       TEST_ASSERT(ret == sizeof(*header), "Read stats header");
+       ret = pread(stats_fd, header, sizeof(*header), 0);
+       TEST_ASSERT(ret == sizeof(*header),
+                   "Failed to read '%lu' header bytes, ret = '%ld'",
+                   sizeof(*header), ret);
 }
 
 struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
index a7001e2..698c1cf 100644 (file)
@@ -43,8 +43,10 @@ static void stats_test(int stats_fd)
        id = malloc(header.name_size);
        TEST_ASSERT(id, "Allocate memory for id string");
 
-       ret = read(stats_fd, id, header.name_size);
-       TEST_ASSERT(ret == header.name_size, "Read id string");
+       ret = pread(stats_fd, id, header.name_size, sizeof(header));
+       TEST_ASSERT(ret == header.name_size,
+                   "Expected header size '%u', read '%lu' bytes",
+                   header.name_size, ret);
 
        /* Check id string, that should start with "kvm" */
        TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size,
@@ -165,23 +167,7 @@ static void stats_test(int stats_fd)
        free(stats_data);
        free(stats_desc);
        free(id);
-}
-
-
-static void vm_stats_test(struct kvm_vm *vm)
-{
-       int stats_fd = vm_get_stats_fd(vm);
-
-       stats_test(stats_fd);
-       close(stats_fd);
-       TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
-}
-
-static void vcpu_stats_test(struct kvm_vcpu *vcpu)
-{
-       int stats_fd = vcpu_get_stats_fd(vcpu);
 
-       stats_test(stats_fd);
        close(stats_fd);
        TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
 }
@@ -199,6 +185,7 @@ static void vcpu_stats_test(struct kvm_vcpu *vcpu)
 
 int main(int argc, char *argv[])
 {
+       int vm_stats_fds, *vcpu_stats_fds;
        int i, j;
        struct kvm_vcpu **vcpus;
        struct kvm_vm **vms;
@@ -231,23 +218,58 @@ int main(int argc, char *argv[])
        vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu);
        TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers");
 
+       /*
+        * Not per-VM as the array is populated, used, and invalidated within a
+        * single for-loop iteration.
+        */
+       vcpu_stats_fds = calloc(max_vm, sizeof(*vcpu_stats_fds));
+       TEST_ASSERT(vcpu_stats_fds, "Allocate memory for VM stats fds");
+
        for (i = 0; i < max_vm; ++i) {
                vms[i] = vm_create_barebones();
                for (j = 0; j < max_vcpu; ++j)
                        vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j);
        }
 
-       /* Check stats read for every VM and VCPU */
+       /*
+        * Check stats read for every VM and vCPU, with a variety of flavors.
+        * Note, stats_test() closes the passed in stats fd.
+        */
        for (i = 0; i < max_vm; ++i) {
-               vm_stats_test(vms[i]);
+               /*
+                * Verify that creating multiple userspace references to a
+                * single stats file works and doesn't cause explosions.
+                */
+               vm_stats_fds = vm_get_stats_fd(vms[i]);
+               stats_test(dup(vm_stats_fds));
+
+               /* Verify userspace can instantiate multiple stats files. */
+               stats_test(vm_get_stats_fd(vms[i]));
+
+               for (j = 0; j < max_vcpu; ++j) {
+                       vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
+                       stats_test(dup(vcpu_stats_fds[j]));
+                       stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
+               }
+
+               /*
+                * Close the VM fd and redo the stats tests.  KVM should gift a
+                * reference (to the VM) to each stats fd, i.e. stats should
+                * still be accessible even after userspace has put its last
+                * _direct_ reference to the VM.
+                */
+               kvm_vm_free(vms[i]);
+
+               stats_test(vm_stats_fds);
                for (j = 0; j < max_vcpu; ++j)
-                       vcpu_stats_test(vcpus[i * max_vcpu + j]);
+                       stats_test(vcpu_stats_fds[j]);
+
                ksft_test_result_pass("vm%i\n", i);
        }
 
-       for (i = 0; i < max_vm; ++i)
-               kvm_vm_free(vms[i]);
        free(vms);
+       free(vcpus);
+       free(vcpu_stats_fds);
 
        ksft_finished();        /* Print results and exit() accordingly */
 }
index a284fce..3610981 100644 (file)
 #include "kvm_util.h"
 #include "processor.h"
 
-static void test_cr4_feature_bit(struct kvm_vcpu *vcpu, struct kvm_sregs *orig,
-                                uint64_t feature_bit)
-{
-       struct kvm_sregs sregs;
-       int rc;
-
-       /* Skip the sub-test, the feature is supported. */
-       if (orig->cr4 & feature_bit)
-               return;
-
-       memcpy(&sregs, orig, sizeof(sregs));
-       sregs.cr4 |= feature_bit;
-
-       rc = _vcpu_sregs_set(vcpu, &sregs);
-       TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
-
-       /* Sanity check that KVM didn't change anything. */
-       vcpu_sregs_get(vcpu, &sregs);
-       TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
-}
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit)                               \
+do {                                                                           \
+       struct kvm_sregs new;                                                   \
+       int rc;                                                                 \
+                                                                               \
+       /* Skip the sub-test, the feature/bit is supported. */                  \
+       if (orig.cr & bit)                                                      \
+               break;                                                          \
+                                                                               \
+       memcpy(&new, &orig, sizeof(sregs));                                     \
+       new.cr |= bit;                                                          \
+                                                                               \
+       rc = _vcpu_sregs_set(vcpu, &new);                                       \
+       TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit);        \
+                                                                               \
+       /* Sanity check that KVM didn't change anything. */                     \
+       vcpu_sregs_get(vcpu, &new);                                             \
+       TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs");   \
+} while (0)
 
 static uint64_t calc_supported_cr4_feature_bits(void)
 {
@@ -80,7 +79,7 @@ int main(int argc, char *argv[])
        struct kvm_vcpu *vcpu;
        struct kvm_vm *vm;
        uint64_t cr4;
-       int rc;
+       int rc, i;
 
        /*
         * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
@@ -92,6 +91,7 @@ int main(int argc, char *argv[])
 
        vcpu_sregs_get(vcpu, &sregs);
 
+       sregs.cr0 = 0;
        sregs.cr4 |= calc_supported_cr4_feature_bits();
        cr4 = sregs.cr4;
 
@@ -103,16 +103,24 @@ int main(int argc, char *argv[])
                    sregs.cr4, cr4);
 
        /* Verify all unsupported features are rejected by KVM. */
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_UMIP);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_LA57);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_VMXE);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMXE);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_FSGSBASE);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PCIDE);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_OSXSAVE);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMEP);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMAP);
-       test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PKE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+       TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+       for (i = 32; i < 64; i++)
+               TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+       /* NW without CD is illegal, as is PG without PE. */
+       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+       TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+
        kvm_vm_free(vm);
 
        /* Create a "real" VM and verify APIC_BASE can be set. */
index 4c88238..e949a43 100644 (file)
@@ -150,8 +150,8 @@ TEST(check_huge_pages)
                MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
                -1, 0);
        if (addr == MAP_FAILED) {
-               if (errno == ENOMEM)
-                       SKIP(return, "No huge pages available.");
+               if (errno == ENOMEM || errno == EINVAL)
+                       SKIP(return, "No huge pages available or CONFIG_HUGETLB_PAGE disabled.");
                else
                        TH_LOG("mmap error: %s", strerror(errno));
        }
old mode 100644 (file)
new mode 100755 (executable)
index 4adaad1..2029455 100644 (file)
@@ -57,9 +57,14 @@ enum {
 
 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
 /* Just the flags we need, copied from mm.h: */
+
+#ifndef FOLL_WRITE
 #define FOLL_WRITE     0x01    /* check pte is writable */
-#define FOLL_LONGTERM   0x10000 /* mapping lifetime is indefinite */
+#endif
 
+#ifndef FOLL_LONGTERM
+#define FOLL_LONGTERM   0x100 /* mapping lifetime is indefinite */
+#endif
 FIXTURE(hmm)
 {
        int             fd;
index 435aceb..380b691 100644 (file)
@@ -831,6 +831,7 @@ int main(int argc, char *argv[])
                                printf("Size must be greater than 0\n");
                                return KSFT_FAIL;
                        }
+                       break;
                case 't':
                        {
                                int tmp = atoi(optarg);
index 6d71d97..301abb9 100644 (file)
@@ -321,8 +321,8 @@ close_uffd:
 munmap:
        munmap(dst, pagesize);
        free(src);
-#endif /* __NR_userfaultfd */
 }
+#endif /* __NR_userfaultfd */
 
 int main(void)
 {
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 501854a..2f9d378 100644 (file)
@@ -15,6 +15,7 @@ ip_local_port_range
 ipsec
 ipv6_flowlabel
 ipv6_flowlabel_mgr
+log.txt
 msg_zerocopy
 nettest
 psock_fanout
@@ -45,6 +46,7 @@ test_unix_oob
 timestamping
 tls
 toeplitz
+tools
 tun
 txring_overwrite
 txtimestamp
index 0f5e88c..df8d90b 100755 (executable)
@@ -1981,6 +1981,11 @@ basic()
 
        run_cmd "$IP link set dev lo up"
 
+       # Dump should not loop endlessly when maximum nexthop ID is configured.
+       run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
+       run_cmd "timeout 5 $IP nexthop"
+       log_test $? 0 "Maximum nexthop ID dump"
+
        #
        # groups
        #
@@ -2201,6 +2206,11 @@ basic_res()
        run_cmd "$IP nexthop bucket list fdb"
        log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
 
+       # Dump should not loop endlessly when maximum nexthop ID is configured.
+       run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
+       run_cmd "timeout 5 $IP nexthop bucket"
+       log_test $? 0 "Maximum nexthop ID dump"
+
        #
        # resilient nexthop buckets get requests
        #
index ae3f946..d0c6c49 100755 (executable)
@@ -617,7 +617,7 @@ __cfg_test_port_ip_sg()
                grep -q "permanent"
        check_err $? "Entry not added as \"permanent\" when should"
        bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
        check_err $? "\"permanent\" entry has a pending group timer"
        bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
@@ -626,7 +626,7 @@ __cfg_test_port_ip_sg()
                grep -q "temp"
        check_err $? "Entry not added as \"temp\" when should"
        bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
        check_fail $? "\"temp\" entry has an unpending group timer"
        bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
@@ -659,7 +659,7 @@ __cfg_test_port_ip_sg()
                grep -q "permanent"
        check_err $? "Entry not marked as \"permanent\" after replace"
        bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
        check_err $? "Entry has a pending group timer after replace"
 
        bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
@@ -667,7 +667,7 @@ __cfg_test_port_ip_sg()
                grep -q "temp"
        check_err $? "Entry not marked as \"temp\" after replace"
        bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
-               grep -q "0.00"
+               grep -q " 0.00"
        check_fail $? "Entry has an unpending group timer after replace"
        bridge mdb del dev br0 port $swp1 $grp_key vid 10
 
@@ -850,6 +850,7 @@ cfg_test()
 __fwd_test_host_ip()
 {
        local grp=$1; shift
+       local dmac=$1; shift
        local src=$1; shift
        local mode=$1; shift
        local name
@@ -872,27 +873,27 @@ __fwd_test_host_ip()
        # Packet should only be flooded to multicast router ports when there is
        # no matching MDB entry. The bridge is not configured as a multicast
        # router port.
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
        tc_check_packets "dev br0 ingress" 1 0
        check_err $? "Packet locally received after flood"
 
        # Install a regular port group entry and expect the packet to not be
        # locally received.
        bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
        tc_check_packets "dev br0 ingress" 1 0
        check_err $? "Packet locally received after installing a regular entry"
 
        # Add a host entry and expect the packet to be locally received.
        bridge mdb add dev br0 port br0 grp $grp temp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
        tc_check_packets "dev br0 ingress" 1 1
        check_err $? "Packet not locally received after adding a host entry"
 
        # Remove the host entry and expect the packet to not be locally
        # received.
        bridge mdb del dev br0 port br0 grp $grp vid 10
-       $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
        tc_check_packets "dev br0 ingress" 1 1
        check_err $? "Packet locally received after removing a host entry"
 
@@ -905,8 +906,8 @@ __fwd_test_host_ip()
 
 fwd_test_host_ip()
 {
-       __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4"
-       __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6"
+       __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4"
+       __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6"
 }
 
 fwd_test_host_l2()
@@ -966,6 +967,7 @@ fwd_test_host()
 __fwd_test_port_ip()
 {
        local grp=$1; shift
+       local dmac=$1; shift
        local valid_src=$1; shift
        local invalid_src=$1; shift
        local mode=$1; shift
@@ -999,43 +1001,43 @@ __fwd_test_port_ip()
                vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
                src_ip $invalid_src action drop
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 1 0
        check_err $? "Packet from valid source received on H2 before adding entry"
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 2 0
        check_err $? "Packet from invalid source received on H2 before adding entry"
 
        bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
                filter_mode $filter_mode source_list $src_list
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 1 1
        check_err $? "Packet from valid source not received on H2 after adding entry"
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 2 0
        check_err $? "Packet from invalid source received on H2 after adding entry"
 
        bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
                filter_mode exclude
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 1 2
        check_err $? "Packet from valid source not received on H2 after allowing all sources"
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 2 1
        check_err $? "Packet from invalid source not received on H2 after allowing all sources"
 
        bridge mdb del dev br0 port $swp2 grp $grp vid 10
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 1 2
        check_err $? "Packet from valid source received on H2 after deleting entry"
 
-       $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+       $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
        tc_check_packets "dev $h2 ingress" 2 1
        check_err $? "Packet from invalid source received on H2 after deleting entry"
 
@@ -1047,11 +1049,11 @@ __fwd_test_port_ip()
 
 fwd_test_port_ip()
 {
-       __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude"
-       __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+       __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+       __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
                "exclude"
-       __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include"
-       __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+       __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include"
+       __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
                "include"
 }
 
@@ -1127,7 +1129,7 @@ ctrl_igmpv3_is_in_test()
                filter_mode include source_list 192.0.2.1
 
        # IS_IN ( 192.0.2.2 )
-       $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+       $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
                -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
        bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
@@ -1140,7 +1142,7 @@ ctrl_igmpv3_is_in_test()
                filter_mode include source_list 192.0.2.1
 
        # IS_IN ( 192.0.2.2 )
-       $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+       $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
                -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
 
        bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
@@ -1167,7 +1169,7 @@ ctrl_mldv2_is_in_test()
 
        # IS_IN ( 2001:db8:1::2 )
        local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
-       $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+       $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
                -t ip hop=1,next=0,p="$p" -q
 
        bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
@@ -1181,7 +1183,7 @@ ctrl_mldv2_is_in_test()
                filter_mode include source_list 2001:db8:1::1
 
        # IS_IN ( 2001:db8:1::2 )
-       $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+       $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
                -t ip hop=1,next=0,p="$p" -q
 
        bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
@@ -1206,6 +1208,11 @@ ctrl_test()
        ctrl_mldv2_is_in_test
 }
 
+if ! bridge mdb help 2>&1 | grep -q "replace"; then
+       echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+       exit $ksft_skip
+fi
+
 trap cleanup EXIT
 
 setup_prepare
index ae255b6..3da9d93 100755 (executable)
@@ -252,7 +252,8 @@ ctl4_entries_add()
        local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
        local peer=$(locus_dev_peer $locus)
        local GRP=239.1.1.${grp}
-       $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \
+       local dmac=01:00:5e:01:01:$(printf "%02x" $grp)
+       $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \
                -t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
        sleep 1
 
@@ -272,7 +273,8 @@ ctl4_entries_del()
 
        local peer=$(locus_dev_peer $locus)
        local GRP=239.1.1.${grp}
-       $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+       local dmac=01:00:5e:00:00:02
+       $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \
                -t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
        sleep 1
        ! bridge mdb show dev br0 | grep -q $GRP
@@ -289,8 +291,10 @@ ctl6_entries_add()
        local peer=$(locus_dev_peer $locus)
        local SIP=fe80::1
        local GRP=ff0e::${grp}
+       local dmac=33:33:00:00:00:$(printf "%02x" $grp)
        local p=$(mldv2_is_in_get $SIP $GRP $IPs)
-       $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+       $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+               -t ip hop=1,next=0,p="$p" -q
        sleep 1
 
        local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
@@ -310,8 +314,10 @@ ctl6_entries_del()
        local peer=$(locus_dev_peer $locus)
        local SIP=fe80::1
        local GRP=ff0e::${grp}
+       local dmac=33:33:00:00:00:$(printf "%02x" $grp)
        local p=$(mldv1_done_get $SIP $GRP)
-       $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+       $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+               -t ip hop=1,next=0,p="$p" -q
        sleep 1
        ! bridge mdb show dev br0 | grep -q $GRP
 }
@@ -1328,6 +1334,11 @@ test_8021qvs()
        switch_destroy
 }
 
+if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
+       echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
+       exit $ksft_skip
+fi
+
 trap cleanup EXIT
 
 setup_prepare
index dbb9fcf..aa2eafb 100755 (executable)
@@ -286,6 +286,8 @@ different_speeds_autoneg_on()
        ethtool -s $h1 autoneg on
 }
 
+skip_on_veth
+
 trap cleanup EXIT
 
 setup_prepare
index c580ad6..39e736f 100755 (executable)
@@ -258,11 +258,6 @@ h2_destroy()
 
 setup_prepare()
 {
-       check_ethtool_mm_support
-       check_tc_fp_support
-       require_command lldptool
-       bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
-
        h1=${NETIFS[p1]}
        h2=${NETIFS[p2]}
 
@@ -278,6 +273,19 @@ cleanup()
        h1_destroy
 }
 
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+       ethtool --show-mm $netif 2>&1 &> /dev/null
+       if [[ $? -ne 0 ]]; then
+               echo "SKIP: $netif does not support MAC Merge"
+               exit $ksft_skip
+       fi
+done
+
 trap cleanup EXIT
 
 setup_prepare
index eb9ec4a..7594bbb 100755 (executable)
@@ -99,6 +99,8 @@ test_stats_rx()
        test_stats g2a rx
 }
 
+skip_on_veth
+
 trap cleanup EXIT
 
 setup_prepare
index 9f5b3e2..49fa94b 100755 (executable)
@@ -14,6 +14,8 @@ ALL_TESTS="
 NUM_NETIFS=4
 source lib.sh
 
+require_command $TROUTE6
+
 h1_create()
 {
        simple_if_init $h1 2001:1:1::2/64
index 9ddb68d..f69015b 100755 (executable)
@@ -30,6 +30,7 @@ REQUIRE_MZ=${REQUIRE_MZ:=yes}
 REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
 STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
 TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
+TROUTE6=${TROUTE6:=traceroute6}
 
 relative_path="${BASH_SOURCE%/*}"
 if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -163,6 +164,17 @@ check_port_mab_support()
        fi
 }
 
+skip_on_veth()
+{
+       local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
+               jq -r '.[].linkinfo.info_kind')
+
+       if [[ $kind == veth ]]; then
+               echo "SKIP: Test cannot be run with veth pairs"
+               exit $ksft_skip
+       fi
+}
+
 if [[ "$(id -u)" -ne 0 ]]; then
        echo "SKIP: need root privileges"
        exit $ksft_skip
@@ -225,6 +237,11 @@ create_netif_veth()
        for ((i = 1; i <= NUM_NETIFS; ++i)); do
                local j=$((i+1))
 
+               if [ -z ${NETIFS[p$i]} ]; then
+                       echo "SKIP: Cannot create interface. Name not specified"
+                       exit $ksft_skip
+               fi
+
                ip link show dev ${NETIFS[p$i]} &> /dev/null
                if [[ $? -ne 0 ]]; then
                        ip link add ${NETIFS[p$i]} type veth \
index aff88f7..5ea9d63 100755 (executable)
@@ -72,7 +72,8 @@ test_span_gre_ttl()
 
        RET=0
 
-       mirror_install $swp1 ingress $tundev "matchall $tcflags"
+       mirror_install $swp1 ingress $tundev \
+               "prot ip flower $tcflags ip_prot icmp"
        tc filter add dev $h3 ingress pref 77 prot $prot \
                flower skip_hw ip_ttl 50 action pass
 
diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings
new file mode 100644 (file)
index 0000000..e7b9417
--- /dev/null
@@ -0,0 +1 @@
+timeout=0
index a96cff8..b0f5e55 100755 (executable)
@@ -9,6 +9,8 @@ NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
 
+require_command ncat
+
 tcflags="skip_hw"
 
 h1_create()
@@ -220,9 +222,9 @@ mirred_egress_to_ingress_tcp_test()
                ip_proto icmp \
                        action drop
 
-       ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2  &
+       ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
        local rpid=$!
-       ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
+       ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
        wait -n $rpid
        cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
        check_err $? "server output check failed"
index 683711f..b1daad1 100755 (executable)
@@ -52,8 +52,8 @@ match_dst_mac_test()
        tc_check_packets "dev $h2 ingress" 101 1
        check_fail $? "Matched on a wrong filter"
 
-       tc_check_packets "dev $h2 ingress" 102 1
-       check_err $? "Did not match on correct filter"
+       tc_check_packets "dev $h2 ingress" 102 0
+       check_fail $? "Did not match on correct filter"
 
        tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
        tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -78,8 +78,8 @@ match_src_mac_test()
        tc_check_packets "dev $h2 ingress" 101 1
        check_fail $? "Matched on a wrong filter"
 
-       tc_check_packets "dev $h2 ingress" 102 1
-       check_err $? "Did not match on correct filter"
+       tc_check_packets "dev $h2 ingress" 102 0
+       check_fail $? "Did not match on correct filter"
 
        tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
        tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
index e22c2d2..20a7cb7 100755 (executable)
@@ -127,6 +127,7 @@ test_l2_miss_multicast_common()
        local proto=$1; shift
        local sip=$1; shift
        local dip=$1; shift
+       local dmac=$1; shift
        local mode=$1; shift
        local name=$1; shift
 
@@ -142,7 +143,7 @@ test_l2_miss_multicast_common()
           action pass
 
        # Before adding MDB entry.
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
 
        tc_check_packets "dev $swp2 egress" 101 1
        check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
@@ -153,7 +154,7 @@ test_l2_miss_multicast_common()
        # Adding MDB entry.
        bridge mdb replace dev br1 port $swp2 grp $dip permanent
 
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
 
        tc_check_packets "dev $swp2 egress" 101 1
        check_err $? "Unregistered multicast filter was hit after adding MDB entry"
@@ -164,7 +165,7 @@ test_l2_miss_multicast_common()
        # Deleting MDB entry.
        bridge mdb del dev br1 port $swp2 grp $dip
 
-       $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+       $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
 
        tc_check_packets "dev $swp2 egress" 101 2
        check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
@@ -183,10 +184,11 @@ test_l2_miss_multicast_ipv4()
        local proto="ipv4"
        local sip=192.0.2.1
        local dip=239.1.1.1
+       local dmac=01:00:5e:01:01:01
        local mode="-4"
        local name="IPv4"
 
-       test_l2_miss_multicast_common $proto $sip $dip $mode $name
+       test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
 }
 
 test_l2_miss_multicast_ipv6()
@@ -194,10 +196,11 @@ test_l2_miss_multicast_ipv6()
        local proto="ipv6"
        local sip=2001:db8:1::1
        local dip=ff0e::1
+       local dmac=33:33:00:00:00:01
        local mode="-6"
        local name="IPv6"
 
-       test_l2_miss_multicast_common $proto $sip $dip $mode $name
+       test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
 }
 
 test_l2_miss_multicast()
index 5ac184d..5a5dd90 100755 (executable)
@@ -104,11 +104,14 @@ tunnel_key_nofrag_test()
        local i
 
        tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \
-               flower ip_flags nofrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags nofrag action drop
        tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \
-               flower ip_flags firstfrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags firstfrag action drop
        tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \
-               flower ip_flags nofirstfrag action drop
+               flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+               ip_flags nofirstfrag action drop
 
        # test 'nofrag' set
        tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \
index e6c9d54..d01b73a 100755 (executable)
@@ -162,9 +162,7 @@ check_tools()
        elif ! iptables -V &> /dev/null; then
                echo "SKIP: Could not run all tests without iptables tool"
                exit $ksft_skip
-       fi
-
-       if ! ip6tables -V &> /dev/null; then
+       elif ! ip6tables -V &> /dev/null; then
                echo "SKIP: Could not run all tests without ip6tables tool"
                exit $ksft_skip
        fi
@@ -707,6 +705,7 @@ pm_nl_del_endpoint()
        local addr=$3
 
        if [ $ip_mptcp -eq 1 ]; then
+               [ $id -ne 0 ] && addr=''
                ip -n $ns mptcp endpoint delete id $id $addr
        else
                ip netns exec $ns ./pm_nl_ctl del $id $addr
@@ -797,10 +796,11 @@ pm_nl_check_endpoint()
        fi
 
        if [ $ip_mptcp -eq 1 ]; then
+               # get line and trim trailing whitespace
                line=$(ip -n $ns mptcp endpoint show $id)
+               line="${line% }"
                # the dump order is: address id flags port dev
-               expected_line="$addr"
-               [ -n "$addr" ] && expected_line="$expected_line $addr"
+               [ -n "$addr" ] && expected_line="$addr"
                expected_line="$expected_line $id"
                [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
                [ -n "$dev" ] && expected_line="$expected_line $dev"
index dfe3d28..f838dd3 100755 (executable)
@@ -361,6 +361,7 @@ err_buf=
 tcpdump_pids=
 nettest_pids=
 socat_pids=
+tmpoutfile=
 
 err() {
        err_buf="${err_buf}${1}
@@ -951,6 +952,7 @@ cleanup() {
        ip link del veth_A-R1                   2>/dev/null
        ovs-vsctl --if-exists del-port vxlan_a  2>/dev/null
        ovs-vsctl --if-exists del-br ovs_br0    2>/dev/null
+       rm -f "$tmpoutfile"
 }
 
 mtu() {
@@ -1328,6 +1330,39 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
        check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
        pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
        check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+
+       tmpoutfile=$(mktemp)
+
+       # Flush Exceptions, retry with TCP
+       run_cmd ${ns_a} ip route flush cached ${dst}
+       run_cmd ${ns_b} ip route flush cached ${dst}
+       run_cmd ${ns_c} ip route flush cached ${dst}
+
+       for target in "${ns_a}" "${ns_c}" ; do
+               if [ ${family} -eq 4 ]; then
+                       TCPDST=TCP:${dst}:50000
+               else
+                       TCPDST="TCP:[${dst}]:50000"
+               fi
+               ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile &
+
+               sleep 1
+
+               dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+
+               size=$(du -sb $tmpoutfile)
+               size=${size%%/tmp/*}
+
+               [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+       done
+
+       rm -f "$tmpoutfile"
+
+       # Check that exceptions were created
+       pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+       check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface"
+       pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+       check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface"
 }
 
 test_pmtu_ipv4_br_vxlan4_exception() {
index 0e04f9f..a148181 100644 (file)
@@ -159,7 +159,7 @@ void create_clients(struct __test_metadata *_metadata,
                /* Make sure SYN will be processed on the i-th CPU
                 * and finally distributed to the i-th listener.
                 */
-               sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+               ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
                ASSERT_EQ(ret, 0);
 
                for (j = 0; j < CLIENT_PER_SERVER; j++) {
index 4863349..970df9e 100644 (file)
@@ -577,7 +577,6 @@ int run_syscall(int min, int max)
                CASE_TEST(chdir_root);        EXPECT_SYSZR(1, chdir("/")); break;
                CASE_TEST(chdir_dot);         EXPECT_SYSZR(1, chdir(".")); break;
                CASE_TEST(chdir_blah);        EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break;
-               CASE_TEST(chmod_net);         EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break;
                CASE_TEST(chmod_self);        EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break;
                CASE_TEST(chown_self);        EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break;
                CASE_TEST(chroot_root);       EXPECT_SYSZR(euid0, chroot("/")); break;
index b92dfeb..99162d1 100755 (executable)
@@ -3,6 +3,8 @@
 #
 # Usage: configcheck.sh .config .config-template
 #
+# Non-empty output if errors detected.
+#
 # Copyright (C) IBM Corporation, 2011
 #
 # Authors: Paul E. McKenney <paulmck@linux.ibm.com>
 T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
 trap 'rm -rf $T' 0
 
-sed -e 's/"//g' < $1 > $T/.config
+# function test_kconfig_enabled ( Kconfig-var=val )
+function test_kconfig_enabled () {
+       if ! grep -q "^$1$" $T/.config
+       then
+               echo :$1: improperly set
+               return 1
+       fi
+       return 0
+}
 
-sed -e 's/"//g' -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' < $2 |
-awk    '
-{
-               print "if grep -q \"" $0 "\" < '"$T/.config"'";
-               print "then";
-               print "\t:";
-               print "else";
-               if ($1 == "#") {
-                       print "\tif grep -q \"" $2 "\" < '"$T/.config"'";
-                       print "\tthen";
-                       print "\t\tif test \"$firsttime\" = \"\""
-                       print "\t\tthen"
-                       print "\t\t\tfirsttime=1"
-                       print "\t\tfi"
-                       print "\t\techo \":" $2 ": improperly set\"";
-                       print "\telse";
-                       print "\t\t:";
-                       print "\tfi";
-               } else {
-                       print "\tif test \"$firsttime\" = \"\""
-                       print "\tthen"
-                       print "\t\tfirsttime=1"
-                       print "\tfi"
-                       print "\techo \":" $0 ": improperly set\"";
-               }
-               print "fi";
-       }' | sh
+# function test_kconfig_disabled ( Kconfig-var )
+function test_kconfig_disabled () {
+       if grep -q "^$1=n$" $T/.config
+       then
+               return 0
+       fi
+       if grep -q "^$1=" $T/.config
+       then
+               echo :$1=n: improperly set
+               return 1
+       fi
+       return 0
+}
+
+sed -e 's/"//g' < $1 > $T/.config
+sed -e 's/^#CHECK#//' < $2 > $T/ConfigFragment
+grep '^CONFIG_.*=n$' $T/ConfigFragment |
+       sed -e 's/^/test_kconfig_disabled /' -e 's/=n$//' > $T/kconfig-n.sh
+. $T/kconfig-n.sh
+grep -v '^CONFIG_.*=n$' $T/ConfigFragment | grep '^CONFIG_' |
+       sed -e 's/^/test_kconfig_enabled /' > $T/kconfig-not-n.sh
+. $T/kconfig-not-n.sh
index 48b9147..b8e2ea2 100644 (file)
@@ -45,7 +45,7 @@ checkarg () {
 configfrag_boot_params () {
        if test -r "$2.boot"
        then
-               echo $1 `grep -v '^#' "$2.boot" | tr '\012' ' '`
+               echo `grep -v '^#' "$2.boot" | tr '\012' ' '` $1
        else
                echo $1
        fi
index b582113..f683e42 100755 (executable)
@@ -40,6 +40,10 @@ awk '
        sum += $5 / 1000.;
 }
 
+/rcu_scale: Grace-period kthread CPU time/ {
+       cputime = $6;
+}
+
 END {
        newNR = asort(gptimes);
        if (newNR <= 0) {
@@ -78,6 +82,8 @@ END {
        print "90th percentile grace-period duration: " gptimes[pct90];
        print "99th percentile grace-period duration: " gptimes[pct99];
        print "Maximum grace-period duration: " gptimes[newNR];
-       print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+       if (cputime != "")
+               cpustr = " CPU: " cputime;
+       print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches cpustr;
        print "Computed from rcuscale printk output.";
 }'
index 1df7e69..5be670d 100755 (executable)
@@ -16,6 +16,8 @@
 T=/tmp/kvm-recheck.sh.$$
 trap 'rm -f $T' 0 2
 
+configerrors=0
+
 PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
 . functions.sh
 for rd in "$@"
@@ -32,7 +34,7 @@ do
                fi
                TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE
                configfile=`echo $i | sed -e 's,^.*/,,'`
-               rm -f $i/console.log.*.diags
+               rm -f $i/console.log.*.diags $i/ConfigFragment.diags
                case "${TORTURE_SUITE}" in
                X*)
                        ;;
@@ -49,8 +51,21 @@ do
                        then
                                echo QEMU killed
                        fi
-                       configcheck.sh $i/.config $i/ConfigFragment > $T 2>&1
-                       cat $T
+                       configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1
+                       if grep -q '^CONFIG_KCSAN=y$' $i/ConfigFragment.input
+                       then
+                               # KCSAN forces a number of Kconfig options, so remove
+                               # complaints about those Kconfig options in KCSAN runs.
+                               mv $i/ConfigFragment.diags $i/ConfigFragment.diags.kcsan
+                               grep -v -E 'CONFIG_PROVE_RCU|CONFIG_PREEMPT_COUNT' $i/ConfigFragment.diags.kcsan > $i/ConfigFragment.diags
+                       fi
+                       if test -s $i/ConfigFragment.diags
+                       then
+                               cat $i/ConfigFragment.diags
+                               configerrors=$((configerrors+1))
+                       else
+                               rm $i/ConfigFragment.diags
+                       fi
                        if test -r $i/Make.oldconfig.err
                        then
                                cat $i/Make.oldconfig.err
@@ -65,7 +80,14 @@ do
                        if test -f "$i/buildonly"
                        then
                                echo Build-only run, no boot/test
-                               configcheck.sh $i/.config $i/ConfigFragment
+                               configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1
+                               if test -s $i/ConfigFragment.diags
+                               then
+                                       cat $i/ConfigFragment.diags
+                                       configerrors=$((configerrors+1))
+                               else
+                                       rm $i/ConfigFragment.diags
+                               fi
                                parse-build.sh $i/Make.out $configfile
                        elif test -f "$i/qemu-cmd"
                        then
@@ -79,10 +101,10 @@ do
        done
        if test -f "$rd/kcsan.sum"
        then
-               if ! test -f $T
+               if ! test -f $i/ConfigFragment.diags
                then
                        :
-               elif grep -q CONFIG_KCSAN=y $T
+               elif grep -q CONFIG_KCSAN=y $i/ConfigFragment.diags
                then
                        echo "Compiler or architecture does not support KCSAN!"
                        echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
@@ -94,17 +116,23 @@ do
                fi
        fi
 done
+
+if test "$configerrors" -gt 0
+then
+       echo $configerrors runs with .config errors.
+       ret=1
+fi
 EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1
 builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`"
 if test "$builderrors" -gt 0
 then
        echo $builderrors runs with build errors.
-       ret=1
+       ret=2
 fi
 runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`"
 if test "$runerrors" -gt 0
 then
        echo $runerrors runs with runtime errors.
-       ret=2
+       ret=3
 fi
 exit $ret
index a232816..134cdef 100755 (executable)
@@ -137,14 +137,20 @@ chmod +x $T/bin/kvm-remote-*.sh
 # Check first to avoid the need for cleanup for system-name typos
 for i in $systems
 do
-       ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
+       ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN > $T/ssh.stdout 2> $T/ssh.stderr
        ret=$?
        if test "$ret" -ne 0
        then
-               echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
+               echo "System $i unreachable ($ret), giving up." | tee -a "$oldrun/remote-log"
+               echo ' --- ssh stdout: vvv' | tee -a "$oldrun/remote-log"
+               cat $T/ssh.stdout | tee -a "$oldrun/remote-log"
+               echo ' --- ssh stdout: ^^^' | tee -a "$oldrun/remote-log"
+               echo ' --- ssh stderr: vvv' | tee -a "$oldrun/remote-log"
+               cat $T/ssh.stderr | tee -a "$oldrun/remote-log"
+               echo ' --- ssh stderr: ^^^' | tee -a "$oldrun/remote-log"
                exit 4
        fi
-       echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
+       echo $i: `cat $T/ssh.stdout` CPUs " " `date` | tee -a "$oldrun/remote-log"
 done
 
 # Download and expand the tarball on all systems.
index d2a3710..b33cd87 100755 (executable)
@@ -9,9 +9,10 @@
 #
 # Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in
 #
-# qemu-args defaults to "-enable-kvm -nographic", along with arguments
-#                      specifying the number of CPUs and other options
-#                      generated from the underlying CPU architecture.
+# qemu-args defaults to "-enable-kvm -display none -no-reboot", along
+#                      with arguments specifying the number of CPUs
+#                      and other options generated from the underlying
+#                      CPU architecture.
 # boot_args_in defaults to value returned by the per_version_boot_params
 #                      shell function.
 #
@@ -57,7 +58,6 @@ config_override_param () {
                cat $T/Kconfig_args >> $resdir/ConfigFragment.input
                config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp
                mv $T/$2.tmp $T/$2
-               # Note that "#CHECK#" is not permitted on commandline.
        fi
 }
 
@@ -140,7 +140,7 @@ then
 fi
 
 # Generate -smp qemu argument.
-qemu_args="-enable-kvm -nographic $qemu_args"
+qemu_args="-enable-kvm -display none -no-reboot $qemu_args"
 cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
 cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"`
 if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS"
@@ -163,7 +163,7 @@ boot_args="`configfrag_boot_params "$boot_args_in" "$config_template"`"
 boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
 if test -n "$TORTURE_BOOT_GDB_ARG"
 then
-       boot_args="$boot_args $TORTURE_BOOT_GDB_ARG"
+       boot_args="$TORTURE_BOOT_GDB_ARG $boot_args"
 fi
 
 # Give bare-metal advice
index d3cdc2d..b0f36a6 100755 (executable)
@@ -186,7 +186,7 @@ do
                fi
                ;;
        --kconfig|--kconfigs)
-               checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
+               checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
                TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
                shift
                ;;
index 71f0dfb..212c52c 100755 (executable)
@@ -10,7 +10,6 @@
 D=tools/testing/selftests/rcutorture
 
 # Prerequisite checks
-[ -z "$D" ] && echo >&2 "No argument supplied" && exit 1
 if [ ! -d "$D" ]; then
     echo >&2 "$D does not exist: Malformed kernel source tree?"
     exit 1
@@ -34,12 +33,16 @@ cat > init.c << '___EOF___'
 
 volatile unsigned long delaycount;
 
-int main(int argc, int argv[])
+int main(int argc, char *argv[])
 {
        int i;
        struct timeval tv;
        struct timeval tvb;
 
+       printf("Torture-test rudimentary init program started, command line:\n");
+       for (i = 0; i < argc; i++)
+               printf(" %s", argv[i]);
+       printf("\n");
        for (;;) {
                sleep(1);
                /* Need some userspace time. */
@@ -64,15 +67,23 @@ ___EOF___
 # build using nolibc on supported archs (smaller executable) and fall
 # back to regular glibc on other ones.
 if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \
-           "||__ARM_EABI__||__aarch64__||__s390x__\nyes\n#endif" \
+          "||__ARM_EABI__||__aarch64__||__s390x__||__loongarch__\nyes\n#endif" \
    | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \
    | grep -q '^yes'; then
        # architecture supported by nolibc
         ${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \
                -nostdlib -include ../../../../include/nolibc/nolibc.h \
                -s -static -Os -o init init.c -lgcc
+       ret=$?
 else
        ${CROSS_COMPILE}gcc -s -static -Os -o init init.c
+       ret=$?
+fi
+
+if [ "$ret" -ne 0 ]
+then
+       echo "Failed to create a statically linked C-language initrd"
+       exit "$ret"
 fi
 
 rm init.c
index 5a2ae22..12b50a4 100755 (executable)
@@ -55,6 +55,8 @@ do_kasan=yes
 do_kcsan=no
 do_clocksourcewd=yes
 do_rt=yes
+do_rcutasksflavors=yes
+do_srcu_lockdep=yes
 
 # doyesno - Helper function for yes/no arguments
 function doyesno () {
@@ -73,18 +75,20 @@ usage () {
        echo "       --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
        echo "       --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
        echo "       --do-all"
-       echo "       --do-allmodconfig / --do-no-allmodconfig"
-       echo "       --do-clocksourcewd / --do-no-clocksourcewd"
-       echo "       --do-kasan / --do-no-kasan"
-       echo "       --do-kcsan / --do-no-kcsan"
-       echo "       --do-kvfree / --do-no-kvfree"
-       echo "       --do-locktorture / --do-no-locktorture"
+       echo "       --do-allmodconfig / --do-no-allmodconfig / --no-allmodconfig"
+       echo "       --do-clocksourcewd / --do-no-clocksourcewd / --no-clocksourcewd"
+       echo "       --do-kasan / --do-no-kasan / --no-kasan"
+       echo "       --do-kcsan / --do-no-kcsan / --no-kcsan"
+       echo "       --do-kvfree / --do-no-kvfree / --no-kvfree"
+       echo "       --do-locktorture / --do-no-locktorture / --no-locktorture"
        echo "       --do-none"
-       echo "       --do-rcuscale / --do-no-rcuscale"
-       echo "       --do-rcutorture / --do-no-rcutorture"
-       echo "       --do-refscale / --do-no-refscale"
-       echo "       --do-rt / --do-no-rt"
-       echo "       --do-scftorture / --do-no-scftorture"
+       echo "       --do-rcuscale / --do-no-rcuscale / --no-rcuscale"
+       echo "       --do-rcutasksflavors / --do-no-rcutasksflavors / --no-rcutasksflavors"
+       echo "       --do-rcutorture / --do-no-rcutorture / --no-rcutorture"
+       echo "       --do-refscale / --do-no-refscale / --no-refscale"
+       echo "       --do-rt / --do-no-rt / --no-rt"
+       echo "       --do-scftorture / --do-no-scftorture / --no-scftorture"
+       echo "       --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep"
        echo "       --duration [ <minutes> | <hours>h | <days>d ]"
        echo "       --kcsan-kmake-arg kernel-make-arguments"
        exit 1
@@ -115,6 +119,7 @@ do
                ;;
        --do-all|--doall)
                do_allmodconfig=yes
+               do_rcutasksflavor=yes
                do_rcutorture=yes
                do_locktorture=yes
                do_scftorture=yes
@@ -125,27 +130,29 @@ do
                do_kasan=yes
                do_kcsan=yes
                do_clocksourcewd=yes
+               do_srcu_lockdep=yes
                ;;
-       --do-allmodconfig|--do-no-allmodconfig)
+       --do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig)
                do_allmodconfig=`doyesno "$1" --do-allmodconfig`
                ;;
-       --do-clocksourcewd|--do-no-clocksourcewd)
+       --do-clocksourcewd|--do-no-clocksourcewd|--no-clocksourcewd)
                do_clocksourcewd=`doyesno "$1" --do-clocksourcewd`
                ;;
-       --do-kasan|--do-no-kasan)
+       --do-kasan|--do-no-kasan|--no-kasan)
                do_kasan=`doyesno "$1" --do-kasan`
                ;;
-       --do-kcsan|--do-no-kcsan)
+       --do-kcsan|--do-no-kcsan|--no-kcsan)
                do_kcsan=`doyesno "$1" --do-kcsan`
                ;;
-       --do-kvfree|--do-no-kvfree)
+       --do-kvfree|--do-no-kvfree|--no-kvfree)
                do_kvfree=`doyesno "$1" --do-kvfree`
                ;;
-       --do-locktorture|--do-no-locktorture)
+       --do-locktorture|--do-no-locktorture|--no-locktorture)
                do_locktorture=`doyesno "$1" --do-locktorture`
                ;;
        --do-none|--donone)
                do_allmodconfig=no
+               do_rcutasksflavors=no
                do_rcutorture=no
                do_locktorture=no
                do_scftorture=no
@@ -156,22 +163,29 @@ do
                do_kasan=no
                do_kcsan=no
                do_clocksourcewd=no
+               do_srcu_lockdep=no
                ;;
-       --do-rcuscale|--do-no-rcuscale)
+       --do-rcuscale|--do-no-rcuscale|--no-rcuscale)
                do_rcuscale=`doyesno "$1" --do-rcuscale`
                ;;
-       --do-rcutorture|--do-no-rcutorture)
+       --do-rcutasksflavors|--do-no-rcutasksflavors|--no-rcutasksflavors)
+               do_rcutasksflavors=`doyesno "$1" --do-rcutasksflavors`
+               ;;
+       --do-rcutorture|--do-no-rcutorture|--no-rcutorture)
                do_rcutorture=`doyesno "$1" --do-rcutorture`
                ;;
-       --do-refscale|--do-no-refscale)
+       --do-refscale|--do-no-refscale|--no-refscale)
                do_refscale=`doyesno "$1" --do-refscale`
                ;;
-       --do-rt|--do-no-rt)
+       --do-rt|--do-no-rt|--no-rt)
                do_rt=`doyesno "$1" --do-rt`
                ;;
-       --do-scftorture|--do-no-scftorture)
+       --do-scftorture|--do-no-scftorture|--no-scftorture)
                do_scftorture=`doyesno "$1" --do-scftorture`
                ;;
+       --do-srcu-lockdep|--do-no-srcu-lockdep|--no-srcu-lockdep)
+               do_srcu_lockdep=`doyesno "$1" --do-srcu-lockdep`
+               ;;
        --duration)
                checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(m\|h\|d\|\)$' '^error'
                mult=1
@@ -361,6 +375,40 @@ then
        fi
 fi
 
+# Test building RCU Tasks flavors in isolation, both SMP and !SMP
+if test "$do_rcutasksflavors" = "yes"
+then
+       echo " --- rcutasksflavors:" Start `date` | tee -a $T/log
+       rtfdir="tools/testing/selftests/rcutorture/res/$ds/results-rcutasksflavors"
+       mkdir -p "$rtfdir"
+       cat > $T/rcutasksflavors << __EOF__
+#CHECK#CONFIG_TASKS_RCU=n
+#CHECK#CONFIG_TASKS_RUDE_RCU=n
+#CHECK#CONFIG_TASKS_TRACE_RCU=n
+__EOF__
+       for flavor in CONFIG_TASKS_RCU CONFIG_TASKS_RUDE_RCU CONFIG_TASKS_TRACE_RCU
+       do
+               forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`"
+               deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`"
+               echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log
+               tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
+               retcode=$?
+               if test "$retcode" -ne 0
+               then
+                       break
+               fi
+       done
+       if test "$retcode" -eq 0
+       then
+               echo "rcutasksflavors($retcode)" $rtfdir >> $T/successes
+               echo Success >> $rtfdir/log
+       else
+               echo "rcutasksflavors($retcode)" $rtfdir >> $T/failures
+               echo " --- rcutasksflavors Test summary:" >> $rtfdir/log
+               echo " --- Summary: Exit code $retcode from $flavor, see Make.out" >> $rtfdir/log
+       fi
+fi
+
 # --torture rcu
 if test "$do_rcutorture" = "yes"
 then
@@ -376,8 +424,10 @@ fi
 
 if test "$do_scftorture" = "yes"
 then
+       # Scale memory based on the number of CPUs.
+       scfmem=$((2+HALF_ALLOTED_CPUS/16))
        torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
-       torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+       torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
 fi
 
 if test "$do_rt" = "yes"
@@ -391,6 +441,23 @@ then
        torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
 fi
 
+if test "$do_srcu_lockdep" = "yes"
+then
+       echo " --- do-srcu-lockdep:" Start `date` | tee -a $T/log
+       tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh --datestamp "$ds/results-srcu-lockdep" > $T/srcu_lockdep.sh.out 2>&1
+       retcode=$?
+       cp $T/srcu_lockdep.sh.out "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+       if test "$retcode" -eq 0
+       then
+               echo "srcu_lockdep($retcode)" "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep" >> $T/successes
+               echo Success >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+       else
+               echo "srcu_lockdep($retcode)" "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep" >> $T/failures
+               echo " --- srcu_lockdep Test Summary:" >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+               echo " --- Summary: Exit code $retcode from srcu_lockdep.sh, see ds/results-srcu-lockdep" >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+       fi
+fi
+
 if test "$do_refscale" = yes
 then
        primlist="`grep '\.name[        ]*=' kernel/rcu/refscale.c | sed -e 's/^[^"]*"//' -e 's/".*$//'`"
@@ -541,11 +608,23 @@ then
 fi
 echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
 echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
+tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
+find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors
+find "$tdir" -name 'Make.out.diags' -print > $T/builderrors
+if test -s "$T/configerrors"
+then
+       echo "  Scenarios with .config errors: `wc -l "$T/configerrors" | awk '{ print $1 }'`"
+       nonkcsanbug="yes"
+fi
+if test -s "$T/builderrors"
+then
+       echo "  Scenarios with build errors: `wc -l "$T/builderrors" | awk '{ print $1 }'`"
+       nonkcsanbug="yes"
+fi
 if test -z "$nonkcsanbug" && test -s "$T/failuresum"
 then
        echo "  All bugs were KCSAN failures."
 fi
-tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
 if test -n "$tdir" && test $compress_concurrency -gt 0
 then
        # KASAN vmlinux files can approach 1GB in size, so compress them.
index d3e4b29..e7bb327 100644 (file)
@@ -22,8 +22,9 @@ locktorture_param_onoff () {
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-       echo $1 `locktorture_param_onoff "$1" "$2"` \
+       echo    `locktorture_param_onoff "$1" "$2"` \
                locktorture.stat_interval=15 \
                locktorture.shutdown_secs=$3 \
-               locktorture.verbose=1
+               locktorture.verbose=1 \
+               $1
 }
index dea26c5..2ef2fb6 100644 (file)
@@ -6,6 +6,5 @@ CONFIG_PREEMPT=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
-#CHECK#CONFIG_RCU_EXPERT=n
 CONFIG_TASKS_RCU=y
 CONFIG_RCU_EXPERT=y
index 04831ef..8ae41d5 100644 (file)
@@ -15,4 +15,3 @@ CONFIG_DEBUG_LOCK_ALLOC=n
 CONFIG_RCU_BOOST=n
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
-CONFIG_BOOTPARAM_HOTPLUG_CPU0=y
index e2bc99c..c044df3 100644 (file)
@@ -46,10 +46,11 @@ rcutorture_param_stat_interval () {
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-       echo $1 `rcutorture_param_onoff "$1" "$2"` \
+       echo    `rcutorture_param_onoff "$1" "$2"` \
                `rcutorture_param_n_barrier_cbs "$1"` \
                `rcutorture_param_stat_interval "$1"` \
                rcutorture.shutdown_secs=$3 \
                rcutorture.test_no_idle_hz=1 \
-               rcutorture.verbose=1
+               rcutorture.verbose=1 \
+               $1
 }
index 6a00157..b1ffd7c 100644 (file)
@@ -2,5 +2,7 @@ CONFIG_RCU_SCALE_TEST=y
 CONFIG_PRINTK_TIME=y
 CONFIG_FORCE_TASKS_RCU=y
 #CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
 CONFIG_FORCE_TASKS_TRACE_RCU=y
 #CHECK#CONFIG_TASKS_TRACE_RCU=y
index 227aba7..0059592 100644 (file)
@@ -2,6 +2,8 @@ CONFIG_SMP=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TREE_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
index ffbe151..28070b4 100644 (file)
@@ -11,6 +11,7 @@
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-       echo $1 rcuscale.shutdown=1 \
-               rcuscale.verbose=0
+       echo    rcuscale.shutdown=1 \
+               rcuscale.verbose=0 \
+               $1
 }
index ef2b501..67f9d29 100644 (file)
@@ -2,6 +2,7 @@ CONFIG_SMP=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
 #CHECK#CONFIG_PREEMPT_RCU=n
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
index f81fa2c..7484656 100644 (file)
@@ -11,6 +11,7 @@
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-       echo $1 refscale.shutdown=1 \
-               refscale.verbose=0
+       echo    refscale.shutdown=1 \
+               refscale.verbose=0 \
+               $1
 }
index 3a59346..6133f54 100644 (file)
@@ -2,6 +2,8 @@ CONFIG_SMP=y
 CONFIG_PREEMPT_NONE=y
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_PREEMPT_RCU=n
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=y
index 2d949e5..7637f68 100644 (file)
@@ -22,8 +22,9 @@ scftorture_param_onoff () {
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-       echo $1 `scftorture_param_onoff "$1" "$2"` \
+       echo    `scftorture_param_onoff "$1" "$2"` \
                scftorture.stat_interval=15 \
                scftorture.shutdown_secs=$3 \
-               scftorture.verbose=1
+               scftorture.verbose=1 \
+               $1
 }
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
deleted file mode 100644 (file)
index 4bed0b6..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-all: srcu.c store_buffering
-
-LINUX_SOURCE = ../../../../../..
-
-modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \
-                     $(LINUX_SOURCE)/kernel/rcu/srcu.c
-
-modified_srcu_output = include/linux/srcu.h srcu.c
-
-include/linux/srcu.h: srcu.c
-
-srcu.c: modify_srcu.awk Makefile $(modified_srcu_input)
-       awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output)
-
-store_buffering:
-       @cd tests/store_buffering; make
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
deleted file mode 100644 (file)
index f2860dd..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <LINUX_SOURCE/linux/kconfig.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
deleted file mode 100644 (file)
index 8bc960e..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This header has been modifies to remove definitions of types that
- * are defined in standard userspace headers or are problematic for some
- * other reason.
- */
-
-#ifndef _LINUX_TYPES_H
-#define _LINUX_TYPES_H
-
-#define __EXPORTED_HEADERS__
-#include <uapi/linux/types.h>
-
-#ifndef __ASSEMBLY__
-
-#define DECLARE_BITMAP(name, bits) \
-       unsigned long name[BITS_TO_LONGS(bits)]
-
-typedef __u32 __kernel_dev_t;
-
-/* bsd */
-typedef unsigned char          u_char;
-typedef unsigned short         u_short;
-typedef unsigned int           u_int;
-typedef unsigned long          u_long;
-
-/* sysv */
-typedef unsigned char          unchar;
-typedef unsigned short         ushort;
-typedef unsigned int           uint;
-typedef unsigned long          ulong;
-
-#ifndef __BIT_TYPES_DEFINED__
-#define __BIT_TYPES_DEFINED__
-
-typedef                __u8            u_int8_t;
-typedef                __s8            int8_t;
-typedef                __u16           u_int16_t;
-typedef                __s16           int16_t;
-typedef                __u32           u_int32_t;
-typedef                __s32           int32_t;
-
-#endif /* !(__BIT_TYPES_DEFINED__) */
-
-typedef                __u8            uint8_t;
-typedef                __u16           uint16_t;
-typedef                __u32           uint32_t;
-
-/* this is a special 64bit data type that is 8-byte aligned */
-#define aligned_u64 __u64 __attribute__((aligned(8)))
-#define aligned_be64 __be64 __attribute__((aligned(8)))
-#define aligned_le64 __le64 __attribute__((aligned(8)))
-
-/**
- * The type used for indexing onto a disc or disc partition.
- *
- * Linux always considers sectors to be 512 bytes long independently
- * of the devices real block size.
- *
- * blkcnt_t is the type of the inode's block count.
- */
-typedef u64 sector_t;
-
-/*
- * The type of an index into the pagecache.
- */
-#define pgoff_t unsigned long
-
-/*
- * A dma_addr_t can hold any valid DMA address, i.e., any address returned
- * by the DMA API.
- *
- * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
- * bits wide.  Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
- * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
- * so they don't care about the size of the actual bus addresses.
- */
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
-
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-typedef u64 phys_addr_t;
-#else
-typedef u32 phys_addr_t;
-#endif
-
-typedef phys_addr_t resource_size_t;
-
-/*
- * This type is the placeholder for a hardware interrupt number. It has to be
- * big enough to enclose whatever representation is used by a given platform.
- */
-typedef unsigned long irq_hw_number_t;
-
-typedef struct {
-       int counter;
-} atomic_t;
-
-#ifdef CONFIG_64BIT
-typedef struct {
-       long counter;
-} atomic64_t;
-#endif
-
-struct list_head {
-       struct list_head *next, *prev;
-};
-
-struct hlist_head {
-       struct hlist_node *first;
-};
-
-struct hlist_node {
-       struct hlist_node *next, **pprev;
-};
-
-/**
- * struct callback_head - callback structure for use with RCU and task_work
- * @next: next update requests in a list
- * @func: actual update function to call after the grace period.
- *
- * The struct is aligned to size of pointer. On most architectures it happens
- * naturally due ABI requirements, but some architectures (like CRIS) have
- * weird ABI and we need to ask it explicitly.
- *
- * The alignment is required to guarantee that bits 0 and 1 of @next will be
- * clear under normal conditions -- as long as we use call_rcu() or
- * call_srcu() to queue callback.
- *
- * This guarantee is important for few reasons:
- *  - future call_rcu_lazy() will make use of lower bits in the pointer;
- *  - the structure shares storage spacer in struct page with @compound_head,
- *    which encode PageTail() in bit 0. The guarantee is needed to avoid
- *    false-positive PageTail().
- */
-struct callback_head {
-       struct callback_head *next;
-       void (*func)(struct callback_head *head);
-} __attribute__((aligned(sizeof(void *))));
-#define rcu_head callback_head
-
-typedef void (*rcu_callback_t)(struct rcu_head *head);
-typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
-
-/* clocksource cycle base type */
-typedef u64 cycle_t;
-
-#endif /*  __ASSEMBLY__ */
-#endif /* _LINUX_TYPES_H */
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
deleted file mode 100755 (executable)
index e05182d..0000000
+++ /dev/null
@@ -1,376 +0,0 @@
-#!/usr/bin/awk -f
-# SPDX-License-Identifier: GPL-2.0
-
-# Modify SRCU for formal verification. The first argument should be srcu.h and
-# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
-# current directory.
-
-BEGIN {
-       if (ARGC != 5) {
-               print "Usange: input.h input.c output.h output.c" > "/dev/stderr";
-               exit 1;
-       }
-       h_output = ARGV[3];
-       c_output = ARGV[4];
-       ARGC = 3;
-
-       # Tokenize using FS and not RS as FS supports regular expressions. Each
-       # record is one line of source, except that backslashed lines are
-       # combined. Comments are treated as field separators, as are quotes.
-       quote_regexp="\"([^\\\\\"]|\\\\.)*\"";
-       comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)";
-       FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+";
-
-       inside_srcu_struct = 0;
-       inside_srcu_init_def = 0;
-       srcu_init_param_name = "";
-       in_macro = 0;
-       brace_nesting = 0;
-       paren_nesting = 0;
-
-       # Allow the manipulation of the last field separator after has been
-       # seen.
-       last_fs = "";
-       # Whether the last field separator was intended to be output.
-       last_fs_print = 0;
-
-       # rcu_batches stores the initialization for each instance of struct
-       # rcu_batch
-
-       in_comment = 0;
-
-       outputfile = "";
-}
-
-{
-       prev_outputfile = outputfile;
-       if (FILENAME ~ /\.h$/) {
-               outputfile = h_output;
-               if (FNR != NR) {
-                       print "Incorrect file order" > "/dev/stderr";
-                       exit 1;
-               }
-       }
-       else
-               outputfile = c_output;
-
-       if (prev_outputfile && outputfile != prev_outputfile) {
-               new_outputfile = outputfile;
-               outputfile = prev_outputfile;
-               update_fieldsep("", 0);
-               outputfile = new_outputfile;
-       }
-}
-
-# Combine the next line into $0.
-function combine_line() {
-       ret = getline next_line;
-       if (ret == 0) {
-               # Don't allow two consecutive getlines at the end of the file
-               if (eof_found) {
-                       print "Error: expected more input." > "/dev/stderr";
-                       exit 1;
-               } else {
-                       eof_found = 1;
-               }
-       } else if (ret == -1) {
-               print "Error reading next line of file" FILENAME > "/dev/stderr";
-               exit 1;
-       }
-       $0 = $0 "\n" next_line;
-}
-
-# Combine backslashed lines and multiline comments.
-function combine_backslashes() {
-       while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) {
-               combine_line();
-       }
-}
-
-function read_line() {
-       combine_line();
-       combine_backslashes();
-}
-
-# Print out field separators and update variables that depend on them. Only
-# print if p is true. Call with sep="" and p=0 to print out the last field
-# separator.
-function update_fieldsep(sep, p) {
-       # Count braces
-       sep_tmp = sep;
-       gsub(quote_regexp "|" comment_regexp, "", sep_tmp);
-       while (1)
-       {
-               if (sub("[^{}()]*\\{", "", sep_tmp)) {
-                       brace_nesting++;
-                       continue;
-               }
-               if (sub("[^{}()]*\\}", "", sep_tmp)) {
-                       brace_nesting--;
-                       if (brace_nesting < 0) {
-                               print "Unbalanced braces!" > "/dev/stderr";
-                               exit 1;
-                       }
-                       continue;
-               }
-               if (sub("[^{}()]*\\(", "", sep_tmp)) {
-                       paren_nesting++;
-                       continue;
-               }
-               if (sub("[^{}()]*\\)", "", sep_tmp)) {
-                       paren_nesting--;
-                       if (paren_nesting < 0) {
-                               print "Unbalanced parenthesis!" > "/dev/stderr";
-                               exit 1;
-                       }
-                       continue;
-               }
-
-               break;
-       }
-
-       if (last_fs_print)
-               printf("%s", last_fs) > outputfile;
-       last_fs = sep;
-       last_fs_print = p;
-}
-
-# Shifts the fields down by n positions. Calls next if there are no more. If p
-# is true then print out field separators.
-function shift_fields(n, p) {
-       do {
-               if (match($0, FS) > 0) {
-                       update_fieldsep(substr($0, RSTART, RLENGTH), p);
-                       if (RSTART + RLENGTH <= length())
-                               $0 = substr($0, RSTART + RLENGTH);
-                       else
-                               $0 = "";
-               } else {
-                       update_fieldsep("", 0);
-                       print "" > outputfile;
-                       next;
-               }
-       } while (--n > 0);
-}
-
-# Shifts and prints the first n fields.
-function print_fields(n) {
-       do {
-               update_fieldsep("", 0);
-               printf("%s", $1) > outputfile;
-               shift_fields(1, 1);
-       } while (--n > 0);
-}
-
-{
-       combine_backslashes();
-}
-
-# Print leading FS
-{
-       if (match($0, "^(" FS ")+") > 0) {
-               update_fieldsep(substr($0, RSTART, RLENGTH), 1);
-               if (RSTART + RLENGTH <= length())
-                       $0 = substr($0, RSTART + RLENGTH);
-               else
-                       $0 = "";
-       }
-}
-
-# Parse the line.
-{
-       while (NF > 0) {
-               if ($1 == "struct" && NF < 3) {
-                       read_line();
-                       continue;
-               }
-
-               if (FILENAME ~ /\.h$/ && !inside_srcu_struct &&
-                   brace_nesting == 0 && paren_nesting == 0 &&
-                   $1 == "struct" && $2 == "srcu_struct" &&
-                   $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") {
-                       inside_srcu_struct = 1;
-                       print_fields(2);
-                       continue;
-               }
-               if (inside_srcu_struct && brace_nesting == 0 &&
-                   paren_nesting == 0) {
-                       inside_srcu_struct = 0;
-                       update_fieldsep("", 0);
-                       for (name in rcu_batches)
-                               print "extern struct rcu_batch " name ";" > outputfile;
-               }
-
-               if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") {
-                       # Move rcu_batches outside of the struct.
-                       rcu_batches[$3] = "";
-                       shift_fields(3, 1);
-                       sub(/;[[:space:]]*$/, "", last_fs);
-                       continue;
-               }
-
-               if (FILENAME ~ /\.h$/ && !inside_srcu_init_def &&
-                   $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") {
-                       inside_srcu_init_def = 1;
-                       srcu_init_param_name = $3;
-                       in_macro = 1;
-                       print_fields(3);
-                       continue;
-               }
-               if (inside_srcu_init_def && brace_nesting == 0 &&
-                   paren_nesting == 0) {
-                       inside_srcu_init_def = 0;
-                       in_macro = 0;
-                       continue;
-               }
-
-               if (inside_srcu_init_def && brace_nesting == 1 &&
-                   paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ &&
-                   $1 ~ /^[[:alnum:]_]+$/) {
-                       name = $1;
-                       if (name in rcu_batches) {
-                               # Remove the dot.
-                               sub(/\.[[:space:]]*$/, "", last_fs);
-
-                               old_record = $0;
-                               do
-                                       shift_fields(1, 0);
-                               while (last_fs !~ /,/ || paren_nesting > 0);
-                               end_loc = length(old_record) - length($0);
-                               end_loc += index(last_fs, ",") - length(last_fs);
-
-                               last_fs = substr(last_fs, index(last_fs, ",") + 1);
-                               last_fs_print = 1;
-
-                               match(old_record, "^"name"("FS")+=");
-                               start_loc = RSTART + RLENGTH;
-
-                               len = end_loc - start_loc;
-                               initializer = substr(old_record, start_loc, len);
-                               gsub(srcu_init_param_name "\\.", "", initializer);
-                               rcu_batches[name] = initializer;
-                               continue;
-                       }
-               }
-
-               # Don't include a nonexistent file
-               if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) {
-                       update_fieldsep("", 0);
-                       next;
-               }
-
-               # Ignore most preprocessor stuff.
-               if (!in_macro && $1 ~ /#/) {
-                       break;
-               }
-
-               if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) {
-                       read_line();
-                       continue;
-               }
-               if (brace_nesting > 0 &&
-                   $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" &&
-                   $2 in rcu_batches) {
-                       # Make uses of rcu_batches global. Somewhat unreliable.
-                       shift_fields(1, 0);
-                       print_fields(1);
-                       continue;
-               }
-
-               if ($1 == "static" && NF < 3) {
-                       read_line();
-                       continue;
-               }
-               if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" ||
-                                      $2 == "void" && $3 == "srcu_flip")) {
-                       shift_fields(1, 1);
-                       print_fields(2);
-                       continue;
-               }
-
-               # Distinguish between read-side and write-side memory barriers.
-               if ($1 == "smp_mb" && NF < 2) {
-                       read_line();
-                       continue;
-               }
-               if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) {
-                       barrier_letter = substr($0, RLENGTH, 1);
-                       if (barrier_letter ~ /A|D/)
-                               new_barrier_name = "sync_smp_mb";
-                       else if (barrier_letter ~ /B|C/)
-                               new_barrier_name = "rs_smp_mb";
-                       else {
-                               print "Unrecognized memory barrier." > "/dev/null";
-                               exit 1;
-                       }
-
-                       shift_fields(1, 1);
-                       printf("%s", new_barrier_name) > outputfile;
-                       continue;
-               }
-
-               # Skip definition of rcu_synchronize, since it is already
-               # defined in misc.h. Only present in old versions of srcu.
-               if (brace_nesting == 0 && paren_nesting == 0 &&
-                   $1 == "struct" && $2 == "rcu_synchronize" &&
-                   $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") {
-                       shift_fields(2, 0);
-                       while (brace_nesting) {
-                               if (NF < 2)
-                                       read_line();
-                               shift_fields(1, 0);
-                       }
-               }
-
-               # Skip definition of wakeme_after_rcu for the same reason
-               if (brace_nesting == 0 && $1 == "static" && $2 == "void" &&
-                   $3 == "wakeme_after_rcu") {
-                       while (NF < 5)
-                               read_line();
-                       shift_fields(3, 0);
-                       do {
-                               while (NF < 3)
-                                       read_line();
-                               shift_fields(1, 0);
-                       } while (paren_nesting || brace_nesting);
-               }
-
-               if ($1 ~ /^(unsigned|long)$/ && NF < 3) {
-                       read_line();
-                       continue;
-               }
-
-               # Give srcu_batches_completed the correct type for old SRCU.
-               if (brace_nesting == 0 && $1 == "long" &&
-                   $2 == "srcu_batches_completed") {
-                       update_fieldsep("", 0);
-                       printf("unsigned ") > outputfile;
-                       print_fields(2);
-                       continue;
-               }
-               if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" &&
-                   $3 == "srcu_batches_completed") {
-                       print_fields(3);
-                       continue;
-               }
-
-               # Just print out the input code by default.
-               print_fields(1);
-       }
-       update_fieldsep("", 0);
-       print > outputfile;
-       next;
-}
-
-END {
-       update_fieldsep("", 0);
-
-       if (brace_nesting != 0) {
-               print "Unbalanced braces!" > "/dev/stderr";
-               exit 1;
-       }
-
-       # Define the rcu_batches
-       for (name in rcu_batches)
-               print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output;
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
deleted file mode 100644 (file)
index 570a49d..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASSUME_H
-#define ASSUME_H
-
-/* Provide an assumption macro that can be disabled for gcc. */
-#ifdef RUN
-#define assume(x) \
-       do { \
-               /* Evaluate x to suppress warnings. */ \
-               (void) (x); \
-       } while (0)
-
-#else
-#define assume(x) __CPROVER_assume(x)
-#endif
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
deleted file mode 100644 (file)
index 3f95a76..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BARRIERS_H
-#define BARRIERS_H
-
-#define barrier() __asm__ __volatile__("" : : : "memory")
-
-#ifdef RUN
-#define smp_mb() __sync_synchronize()
-#define smp_mb__after_unlock_lock() __sync_synchronize()
-#else
-/*
- * Copied from CBMC's implementation of __sync_synchronize(), which
- * seems to be disabled by default.
- */
-#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
-                                "WWcumul", "RRcumul", "RWcumul", "WRcumul")
-#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
-                                   "WWcumul", "RRcumul", "RWcumul", "WRcumul")
-#endif
-
-/*
- * Allow memory barriers to be disabled in either the read or write side
- * of SRCU individually.
- */
-
-#ifndef NO_SYNC_SMP_MB
-#define sync_smp_mb() smp_mb()
-#else
-#define sync_smp_mb() do {} while (0)
-#endif
-
-#ifndef NO_READ_SIDE_SMP_MB
-#define rs_smp_mb() smp_mb()
-#else
-#define rs_smp_mb() do {} while (0)
-#endif
-
-#define READ_ONCE(x) (*(volatile typeof(x) *) &(x))
-#define WRITE_ONCE(x) ((*(volatile typeof(x) *) &(x)) = (val))
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
deleted file mode 100644 (file)
index 5e7912c..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BUG_ON_H
-#define BUG_ON_H
-
-#include <assert.h>
-
-#define BUG() assert(0)
-#define BUG_ON(x) assert(!(x))
-
-/* Does it make sense to treat warnings as errors? */
-#define WARN() BUG()
-#define WARN_ON(x) (BUG_ON(x), false)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
deleted file mode 100644 (file)
index e67ee5b..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-/* Include all source files. */
-
-#include "include_srcu.c"
-
-#include "preempt.c"
-#include "misc.c"
-
-/* Used by test.c files */
-#include <pthread.h>
-#include <stdlib.h>
-#include <linux/srcu.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
deleted file mode 100644 (file)
index 283d710..0000000
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* "Cheater" definitions based on restricted Kconfig choices. */
-
-#undef CONFIG_TINY_RCU
-#undef __CHECKER__
-#undef CONFIG_DEBUG_LOCK_ALLOC
-#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-#undef CONFIG_HOTPLUG_CPU
-#undef CONFIG_MODULES
-#undef CONFIG_NO_HZ_FULL_SYSIDLE
-#undef CONFIG_PREEMPT_COUNT
-#undef CONFIG_PREEMPT_RCU
-#undef CONFIG_PROVE_RCU
-#undef CONFIG_RCU_NOCB_CPU
-#undef CONFIG_RCU_NOCB_CPU_ALL
-#undef CONFIG_RCU_STALL_COMMON
-#undef CONFIG_RCU_TRACE
-#undef CONFIG_RCU_USER_QS
-#undef CONFIG_TASKS_RCU
-#define CONFIG_TREE_RCU
-
-#define CONFIG_GENERIC_ATOMIC64
-
-#if NR_CPUS > 1
-#define CONFIG_SMP
-#else
-#undef CONFIG_SMP
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
deleted file mode 100644 (file)
index e5202d4..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include <assert.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/types.h>
-
-#include "int_typedefs.h"
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "locks.h"
-#include "misc.h"
-#include "preempt.h"
-#include "percpu.h"
-#include "workqueues.h"
-
-#ifdef USE_SIMPLE_SYNC_SRCU
-#define synchronize_srcu(sp) synchronize_srcu_original(sp)
-#endif
-
-#include <srcu.c>
-
-#ifdef USE_SIMPLE_SYNC_SRCU
-#undef synchronize_srcu
-
-#include "simple_sync_srcu.c"
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
deleted file mode 100644 (file)
index 0dd27aa..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef INT_TYPEDEFS_H
-#define INT_TYPEDEFS_H
-
-#include <inttypes.h>
-
-typedef int8_t s8;
-typedef uint8_t u8;
-typedef int16_t s16;
-typedef uint16_t u16;
-typedef int32_t s32;
-typedef uint32_t u32;
-typedef int64_t s64;
-typedef uint64_t u64;
-
-typedef int8_t __s8;
-typedef uint8_t __u8;
-typedef int16_t __s16;
-typedef uint16_t __u16;
-typedef int32_t __s32;
-typedef uint32_t __u32;
-typedef int64_t __s64;
-typedef uint64_t __u64;
-
-#define S8_C(x) INT8_C(x)
-#define U8_C(x) UINT8_C(x)
-#define S16_C(x) INT16_C(x)
-#define U16_C(x) UINT16_C(x)
-#define S32_C(x) INT32_C(x)
-#define U32_C(x) UINT32_C(x)
-#define S64_C(x) INT64_C(x)
-#define U64_C(x) UINT64_C(x)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
deleted file mode 100644 (file)
index 1e24827..0000000
+++ /dev/null
@@ -1,221 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LOCKS_H
-#define LOCKS_H
-
-#include <limits.h>
-#include <pthread.h>
-#include <stdbool.h>
-
-#include "assume.h"
-#include "bug_on.h"
-#include "preempt.h"
-
-int nondet_int(void);
-
-#define __acquire(x)
-#define __acquires(x)
-#define __release(x)
-#define __releases(x)
-
-/* Only use one lock mechanism. Select which one. */
-#ifdef PTHREAD_LOCK
-struct lock_impl {
-       pthread_mutex_t mutex;
-};
-
-static inline void lock_impl_lock(struct lock_impl *lock)
-{
-       BUG_ON(pthread_mutex_lock(&lock->mutex));
-}
-
-static inline void lock_impl_unlock(struct lock_impl *lock)
-{
-       BUG_ON(pthread_mutex_unlock(&lock->mutex));
-}
-
-static inline bool lock_impl_trylock(struct lock_impl *lock)
-{
-       int err = pthread_mutex_trylock(&lock->mutex);
-
-       if (!err)
-               return true;
-       else if (err == EBUSY)
-               return false;
-       BUG();
-}
-
-static inline void lock_impl_init(struct lock_impl *lock)
-{
-       pthread_mutex_init(&lock->mutex, NULL);
-}
-
-#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER}
-
-#else /* !defined(PTHREAD_LOCK) */
-/* Spinlock that assumes that it always gets the lock immediately. */
-
-struct lock_impl {
-       bool locked;
-};
-
-static inline bool lock_impl_trylock(struct lock_impl *lock)
-{
-#ifdef RUN
-       /* TODO: Should this be a test and set? */
-       return __sync_bool_compare_and_swap(&lock->locked, false, true);
-#else
-       __CPROVER_atomic_begin();
-       bool old_locked = lock->locked;
-       lock->locked = true;
-       __CPROVER_atomic_end();
-
-       /* Minimal barrier to prevent accesses leaking out of lock. */
-       __CPROVER_fence("RRfence", "RWfence");
-
-       return !old_locked;
-#endif
-}
-
-static inline void lock_impl_lock(struct lock_impl *lock)
-{
-       /*
-        * CBMC doesn't support busy waiting, so just assume that the
-        * lock is available.
-        */
-       assume(lock_impl_trylock(lock));
-
-       /*
-        * If the lock was already held by this thread then the assumption
-        * is unsatisfiable (deadlock).
-        */
-}
-
-static inline void lock_impl_unlock(struct lock_impl *lock)
-{
-#ifdef RUN
-       BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false));
-#else
-       /* Minimal barrier to prevent accesses leaking out of lock. */
-       __CPROVER_fence("RWfence", "WWfence");
-
-       __CPROVER_atomic_begin();
-       bool old_locked = lock->locked;
-       lock->locked = false;
-       __CPROVER_atomic_end();
-
-       BUG_ON(!old_locked);
-#endif
-}
-
-static inline void lock_impl_init(struct lock_impl *lock)
-{
-       lock->locked = false;
-}
-
-#define LOCK_IMPL_INITIALIZER {.locked = false}
-
-#endif /* !defined(PTHREAD_LOCK) */
-
-/*
- * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing
- * locks of different types.
- */
-typedef struct {
-       struct lock_impl internal_lock;
-} spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER}
-#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED
-#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
-
-static inline void spin_lock_init(spinlock_t *lock)
-{
-       lock_impl_init(&lock->internal_lock);
-}
-
-static inline void spin_lock(spinlock_t *lock)
-{
-       /*
-        * Spin locks also need to be removed in order to eliminate all
-        * memory barriers. They are only used by the write side anyway.
-        */
-#ifndef NO_SYNC_SMP_MB
-       preempt_disable();
-       lock_impl_lock(&lock->internal_lock);
-#endif
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-#ifndef NO_SYNC_SMP_MB
-       lock_impl_unlock(&lock->internal_lock);
-       preempt_enable();
-#endif
-}
-
-/* Don't bother with interrupts */
-#define spin_lock_irq(lock) spin_lock(lock)
-#define spin_unlock_irq(lock) spin_unlock(lock)
-#define spin_lock_irqsave(lock, flags) spin_lock(lock)
-#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock)
-
-/*
- * This is supposed to return an int, but I think that a bool should work as
- * well.
- */
-static inline bool spin_trylock(spinlock_t *lock)
-{
-#ifndef NO_SYNC_SMP_MB
-       preempt_disable();
-       return lock_impl_trylock(&lock->internal_lock);
-#else
-       return true;
-#endif
-}
-
-struct completion {
-       /* Hopefully this won't overflow. */
-       unsigned int count;
-};
-
-#define COMPLETION_INITIALIZER(x) {.count = 0}
-#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x)
-#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x)
-
-static inline void init_completion(struct completion *c)
-{
-       c->count = 0;
-}
-
-static inline void wait_for_completion(struct completion *c)
-{
-       unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1);
-
-       assume(prev_count);
-}
-
-static inline void complete(struct completion *c)
-{
-       unsigned int prev_count = __sync_fetch_and_add(&c->count, 1);
-
-       BUG_ON(prev_count == UINT_MAX);
-}
-
-/* This function probably isn't very useful for CBMC. */
-static inline bool try_wait_for_completion(struct completion *c)
-{
-       BUG();
-}
-
-static inline bool completion_done(struct completion *c)
-{
-       return c->count;
-}
-
-/* TODO: Implement complete_all */
-static inline void complete_all(struct completion *c)
-{
-       BUG();
-}
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
deleted file mode 100644 (file)
index 9440cc3..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include "misc.h"
-#include "bug_on.h"
-
-struct rcu_head;
-
-void wakeme_after_rcu(struct rcu_head *head)
-{
-       BUG();
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
deleted file mode 100644 (file)
index aca5003..0000000
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef MISC_H
-#define MISC_H
-
-#include "assume.h"
-#include "int_typedefs.h"
-#include "locks.h"
-
-#include <linux/types.h>
-
-/* Probably won't need to deal with bottom halves. */
-static inline void local_bh_disable(void) {}
-static inline void local_bh_enable(void) {}
-
-#define MODULE_ALIAS(X)
-#define module_param(...)
-#define EXPORT_SYMBOL_GPL(x)
-
-#define container_of(ptr, type, member) ({                     \
-       const typeof(((type *)0)->member) *__mptr = (ptr);      \
-       (type *)((char *)__mptr - offsetof(type, member));      \
-})
-
-#ifndef USE_SIMPLE_SYNC_SRCU
-/* Abuse udelay to make sure that busy loops terminate. */
-#define udelay(x) assume(0)
-
-#else
-
-/* The simple custom synchronize_srcu is ok with try_check_zero failing. */
-#define udelay(x) do { } while (0)
-#endif
-
-#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
-       do { } while (0)
-
-#define notrace
-
-/* Avoid including rcupdate.h */
-struct rcu_synchronize {
-       struct rcu_head head;
-       struct completion completion;
-};
-
-void wakeme_after_rcu(struct rcu_head *head);
-
-#define rcu_lock_acquire(a) do { } while (0)
-#define rcu_lock_release(a) do { } while (0)
-#define rcu_lockdep_assert(c, s) do { } while (0)
-#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
-
-/* Let CBMC non-deterministically choose switch between normal and expedited. */
-bool rcu_gp_is_normal(void);
-bool rcu_gp_is_expedited(void);
-
-/* Do the same for old versions of rcu. */
-#define rcu_expedited (rcu_gp_is_expedited())
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
deleted file mode 100644 (file)
index 27e67a3..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PERCPU_H
-#define PERCPU_H
-
-#include <stddef.h>
-#include "bug_on.h"
-#include "preempt.h"
-
-#define __percpu
-
-/* Maximum size of any percpu data. */
-#define PERCPU_OFFSET (4 * sizeof(long))
-
-/* Ignore alignment, as CBMC doesn't care about false sharing. */
-#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1)
-
-static inline void *__alloc_percpu(size_t size, size_t align)
-{
-       BUG();
-       return NULL;
-}
-
-static inline void free_percpu(void *ptr)
-{
-       BUG();
-}
-
-#define per_cpu_ptr(ptr, cpu) \
-       ((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu))
-
-#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
-#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
-#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n))
-
-#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
-#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
-#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n))
-
-/* Make CBMC use atomics to work around bug. */
-#ifdef RUN
-#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x))
-#else
-/*
- * Split the atomic into a read and a write so that it has the least
- * possible ordering.
- */
-#define THIS_CPU_ADD_HELPER(ptr, x) \
-       do { \
-               typeof(ptr) this_cpu_add_helper_ptr = (ptr); \
-               typeof(ptr) this_cpu_add_helper_x = (x); \
-               typeof(*ptr) this_cpu_add_helper_temp; \
-               __CPROVER_atomic_begin(); \
-               this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \
-               __CPROVER_atomic_end(); \
-               this_cpu_add_helper_temp += this_cpu_add_helper_x; \
-               __CPROVER_atomic_begin(); \
-               *(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \
-               __CPROVER_atomic_end(); \
-       } while (0)
-#endif
-
-/*
- * For some reason CBMC needs an atomic operation even though this is percpu
- * data.
- */
-#define __this_cpu_add(pcp, n) \
-       do { \
-               BUG_ON(preemptible()); \
-               THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \
-                                   (typeof(pcp)) (n)); \
-       } while (0)
-
-#define this_cpu_add(pcp, n) \
-       do { \
-               int this_cpu_add_impl_cpu = get_cpu(); \
-               THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \
-                                   (typeof(pcp)) (n)); \
-               put_cpu(); \
-       } while (0)
-
-/*
- * This will cause a compiler warning because of the cast from char[][] to
- * type*. This will cause a compile time error if type is too big.
- */
-#define DEFINE_PER_CPU(type, name) \
-       char name[NR_CPUS][PERCPU_OFFSET]; \
-       typedef char percpu_too_big_##name \
-               [sizeof(type) > PERCPU_OFFSET ? -1 : 1]
-
-#define for_each_possible_cpu(cpu) \
-       for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu))
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
deleted file mode 100644 (file)
index b4083ae..0000000
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include "preempt.h"
-
-#include "assume.h"
-#include "locks.h"
-
-/* Support NR_CPUS of at most 64 */
-#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER
-#define CPU_PREEMPTION_LOCKS_INIT1 \
-       CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0
-#define CPU_PREEMPTION_LOCKS_INIT2 \
-       CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1
-#define CPU_PREEMPTION_LOCKS_INIT3 \
-       CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2
-#define CPU_PREEMPTION_LOCKS_INIT4 \
-       CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3
-#define CPU_PREEMPTION_LOCKS_INIT5 \
-       CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4
-
-/*
- * Simulate disabling preemption by locking a particular cpu. NR_CPUS
- * should be the actual number of cpus, not just the maximum.
- */
-struct lock_impl cpu_preemption_locks[NR_CPUS] = {
-       CPU_PREEMPTION_LOCKS_INIT0
-#if (NR_CPUS - 1) & 1
-       , CPU_PREEMPTION_LOCKS_INIT0
-#endif
-#if (NR_CPUS - 1) & 2
-       , CPU_PREEMPTION_LOCKS_INIT1
-#endif
-#if (NR_CPUS - 1) & 4
-       , CPU_PREEMPTION_LOCKS_INIT2
-#endif
-#if (NR_CPUS - 1) & 8
-       , CPU_PREEMPTION_LOCKS_INIT3
-#endif
-#if (NR_CPUS - 1) & 16
-       , CPU_PREEMPTION_LOCKS_INIT4
-#endif
-#if (NR_CPUS - 1) & 32
-       , CPU_PREEMPTION_LOCKS_INIT5
-#endif
-};
-
-#undef CPU_PREEMPTION_LOCKS_INIT0
-#undef CPU_PREEMPTION_LOCKS_INIT1
-#undef CPU_PREEMPTION_LOCKS_INIT2
-#undef CPU_PREEMPTION_LOCKS_INIT3
-#undef CPU_PREEMPTION_LOCKS_INIT4
-#undef CPU_PREEMPTION_LOCKS_INIT5
-
-__thread int thread_cpu_id;
-__thread int preempt_disable_count;
-
-void preempt_disable(void)
-{
-       BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX);
-
-       if (preempt_disable_count++)
-               return;
-
-       thread_cpu_id = nondet_int();
-       assume(thread_cpu_id >= 0);
-       assume(thread_cpu_id < NR_CPUS);
-       lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]);
-}
-
-void preempt_enable(void)
-{
-       BUG_ON(preempt_disable_count < 1);
-
-       if (--preempt_disable_count)
-               return;
-
-       lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]);
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
deleted file mode 100644 (file)
index f8b762c..0000000
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PREEMPT_H
-#define PREEMPT_H
-
-#include <stdbool.h>
-
-#include "bug_on.h"
-
-/* This flag contains garbage if preempt_disable_count is 0. */
-extern __thread int thread_cpu_id;
-
-/* Support recursive preemption disabling. */
-extern __thread int preempt_disable_count;
-
-void preempt_disable(void);
-void preempt_enable(void);
-
-static inline void preempt_disable_notrace(void)
-{
-       preempt_disable();
-}
-
-static inline void preempt_enable_no_resched(void)
-{
-       preempt_enable();
-}
-
-static inline void preempt_enable_notrace(void)
-{
-       preempt_enable();
-}
-
-static inline int preempt_count(void)
-{
-       return preempt_disable_count;
-}
-
-static inline bool preemptible(void)
-{
-       return !preempt_count();
-}
-
-static inline int get_cpu(void)
-{
-       preempt_disable();
-       return thread_cpu_id;
-}
-
-static inline void put_cpu(void)
-{
-       preempt_enable();
-}
-
-static inline void might_sleep(void)
-{
-       BUG_ON(preempt_disable_count);
-}
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
deleted file mode 100644 (file)
index 97f5920..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include <assert.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/types.h>
-
-#include "int_typedefs.h"
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "locks.h"
-#include "misc.h"
-#include "preempt.h"
-#include "percpu.h"
-#include "workqueues.h"
-
-#include <linux/srcu.h>
-
-/* Functions needed from modify_srcu.c */
-bool try_check_zero(struct srcu_struct *sp, int idx, int trycount);
-void srcu_flip(struct srcu_struct *sp);
-
-/* Simpler implementation of synchronize_srcu that ignores batching. */
-void synchronize_srcu(struct srcu_struct *sp)
-{
-       int idx;
-       /*
-        * This code assumes that try_check_zero will succeed anyway,
-        * so there is no point in multiple tries.
-        */
-       const int trycount = 1;
-
-       might_sleep();
-
-       /* Ignore the lock, as multiple writers aren't working yet anyway. */
-
-       idx = 1 ^ (sp->completed & 1);
-
-       /* For comments see srcu_advance_batches. */
-
-       assume(try_check_zero(sp, idx, trycount));
-
-       srcu_flip(sp);
-
-       assume(try_check_zero(sp, idx^1, trycount));
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
deleted file mode 100644 (file)
index 28b9603..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef WORKQUEUES_H
-#define WORKQUEUES_H
-
-#include <stdbool.h>
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "int_typedefs.h"
-
-#include <linux/types.h>
-
-/* Stub workqueue implementation. */
-
-struct work_struct;
-typedef void (*work_func_t)(struct work_struct *work);
-void delayed_work_timer_fn(unsigned long __data);
-
-struct work_struct {
-/*     atomic_long_t data; */
-       unsigned long data;
-
-       struct list_head entry;
-       work_func_t func;
-#ifdef CONFIG_LOCKDEP
-       struct lockdep_map lockdep_map;
-#endif
-};
-
-struct timer_list {
-       struct hlist_node       entry;
-       unsigned long           expires;
-       void                    (*function)(unsigned long);
-       unsigned long           data;
-       u32                     flags;
-       int                     slack;
-};
-
-struct delayed_work {
-       struct work_struct work;
-       struct timer_list timer;
-
-       /* target workqueue and CPU ->timer uses to queue ->work */
-       struct workqueue_struct *wq;
-       int cpu;
-};
-
-
-static inline bool schedule_work(struct work_struct *work)
-{
-       BUG();
-       return true;
-}
-
-static inline bool schedule_work_on(int cpu, struct work_struct *work)
-{
-       BUG();
-       return true;
-}
-
-static inline bool queue_work(struct workqueue_struct *wq,
-                             struct work_struct *work)
-{
-       BUG();
-       return true;
-}
-
-static inline bool queue_delayed_work(struct workqueue_struct *wq,
-                                     struct delayed_work *dwork,
-                                     unsigned long delay)
-{
-       BUG();
-       return true;
-}
-
-#define INIT_WORK(w, f) \
-       do { \
-               (w)->data = 0; \
-               (w)->func = (f); \
-       } while (0)
-
-#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f))
-
-#define __WORK_INITIALIZER(n, f) { \
-               .data = 0, \
-               .entry = { &(n).entry, &(n).entry }, \
-               .func = f \
-       }
-
-/* Don't bother initializing timer. */
-#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
-       .work = __WORK_INITIALIZER((n).work, (f)), \
-       }
-
-#define DECLARE_WORK(n, f) \
-       struct workqueue_struct n = __WORK_INITIALIZER
-
-#define DECLARE_DELAYED_WORK(n, f) \
-       struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
-
-#define system_power_efficient_wq ((struct workqueue_struct *) NULL)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
deleted file mode 100644 (file)
index d65462d..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-*.out
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
deleted file mode 100644 (file)
index ad21b92..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso
-
-all:
-       for i in ./*.pass; do \
-               echo $$i ; \
-               CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \
-       done
-       for i in ./*.fail; do \
-               echo $$i ; \
-               CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \
-       done
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
deleted file mode 100644 (file)
index 40c8075..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DASSERT_END"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
deleted file mode 100644 (file)
index ada5baf..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
deleted file mode 100644 (file)
index 8fe00c8..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE_2"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
deleted file mode 100644 (file)
index 612ed67..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE_3"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
deleted file mode 100644 (file)
index 2ce2016..0000000
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <src/combined_source.c>
-
-int x;
-int y;
-
-int __unbuffered_tpr_x;
-int __unbuffered_tpr_y;
-
-DEFINE_SRCU(ss);
-
-void rcu_reader(void)
-{
-       int idx;
-
-#ifndef FORCE_FAILURE_3
-       idx = srcu_read_lock(&ss);
-#endif
-       might_sleep();
-
-       __unbuffered_tpr_y = READ_ONCE(y);
-#ifdef FORCE_FAILURE
-       srcu_read_unlock(&ss, idx);
-       idx = srcu_read_lock(&ss);
-#endif
-       WRITE_ONCE(x, 1);
-
-#ifndef FORCE_FAILURE_3
-       srcu_read_unlock(&ss, idx);
-#endif
-       might_sleep();
-}
-
-void *thread_update(void *arg)
-{
-       WRITE_ONCE(y, 1);
-#ifndef FORCE_FAILURE_2
-       synchronize_srcu(&ss);
-#endif
-       might_sleep();
-       __unbuffered_tpr_x = READ_ONCE(x);
-
-       return NULL;
-}
-
-void *thread_process_reader(void *arg)
-{
-       rcu_reader();
-
-       return NULL;
-}
-
-int main(int argc, char *argv[])
-{
-       pthread_t tu;
-       pthread_t tpr;
-
-       if (pthread_create(&tu, NULL, thread_update, NULL))
-               abort();
-       if (pthread_create(&tpr, NULL, thread_process_reader, NULL))
-               abort();
-       if (pthread_join(tu, NULL))
-               abort();
-       if (pthread_join(tpr, NULL))
-               abort();
-       assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0);
-
-#ifdef ASSERT_END
-       assert(0);
-#endif
-
-       return 0;
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
deleted file mode 100755 (executable)
index 2fe1f03..0000000
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# This script expects a mode (either --should-pass or --should-fail) followed by
-# an input file. The script uses the following environment variables. The test C
-# source file is expected to be named test.c in the directory containing the
-# input file.
-#
-# CBMC: The command to run CBMC. Default: cbmc
-# CBMC_FLAGS: Additional flags to pass to CBMC
-# NR_CPUS: Number of cpus to run tests with. Default specified by the test
-# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple.
-#                 kernel: Version included in the linux kernel source.
-#                 simple: Use try_check_zero directly.
-#
-# The input file is a script that is sourced by this file. It can define any of
-# the following variables to configure the test.
-#
-# test_cbmc_options: Extra options to pass to CBMC.
-# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail.
-#                The test is expected to pass if it is run with fewer. (Only
-#                useful for .fail files)
-# default_cpus: Quantity of CPUs to use for the test, if not specified on the
-#               command line. Default: Larger of 2 and MIN_CPUS_FAIL.
-
-set -e
-
-if test "$#" -ne 2; then
-       echo "Expected one option followed by an input file" 1>&2
-       exit 99
-fi
-
-if test "x$1" = "x--should-pass"; then
-       should_pass="yes"
-elif test "x$1" = "x--should-fail"; then
-       should_pass="no"
-else
-       echo "Unrecognized argument '$1'" 1>&2
-
-       # Exit code 99 indicates a hard error.
-       exit 99
-fi
-
-CBMC=${CBMC:-cbmc}
-
-SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple}
-
-case ${SYNC_SRCU_MODE} in
-kernel) sync_srcu_mode_flags="" ;;
-simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;;
-
-*)
-       echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2
-       exit 99
-       ;;
-esac
-
-min_cpus_fail=1
-
-c_file=`dirname "$2"`/test.c
-
-# Source the input file.
-. $2
-
-if test ${min_cpus_fail} -gt 2; then
-       default_default_cpus=${min_cpus_fail}
-else
-       default_default_cpus=2
-fi
-default_cpus=${default_cpus:-${default_default_cpus}}
-cpus=${NR_CPUS:-${default_cpus}}
-
-# Check if there are two few cpus to make the test fail.
-if test $cpus -lt ${min_cpus_fail:-0}; then
-       should_pass="yes"
-fi
-
-cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}"
-
-echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}"
-if ${CBMC} ${cbmc_opts} "${c_file}"; then
-       # Verification successful. Make sure that it was supposed to verify.
-       test "x${should_pass}" = xyes
-else
-       cbmc_exit_status=$?
-
-       # An exit status of 10 indicates a failed verification.
-       # (see cbmc_parse_optionst::do_bmc in the CBMC source code)
-       if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then
-               :
-       else
-               echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2
-
-               # Parse errors have exit status 6. Any other type of error
-               # should be considered a hard error.
-               if test ${cbmc_exit_status} -ne 6 && \
-                  test ${cbmc_exit_status} -ne 10; then
-                       exit 99
-               else
-                       exit 1
-               fi
-       fi
-fi
index 9dd629c..f4b3d5c 100644 (file)
@@ -43,7 +43,7 @@ run_tests: all
        done
 
 # Avoid any output on non riscv on emit_tests
-emit_tests: all
+emit_tests:
        @for DIR in $(RISCV_SUBTARGETS); do                             \
                BUILD_TARGET=$(OUTPUT)/$$DIR;                   \
                $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;              \
index 5cbc392..2c0d2b1 100644 (file)
@@ -1,6 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0-only
-#include <sys/prctl.h>
-
 #define THIS_PROGRAM "./vstate_exec_nolibc"
 
 int main(int argc, char **argv)
index b357ba2..7a957c7 100644 (file)
@@ -4,8 +4,10 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
 CLANG_FLAGS += -no-integrated-as
 endif
 
+top_srcdir = ../../../..
+
 CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \
-         $(CLANG_FLAGS)
+         $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
 LDLIBS += -lpthread -ldl
 
 # Own dependencies because we only want to build against 1st prerequisite, but
index 4e4aa00..96e812b 100644 (file)
 #include <sys/auxv.h>
 #include <linux/auxvec.h>
 
+#include <linux/compiler.h>
+
 #include "../kselftest.h"
 #include "rseq.h"
 
-static const ptrdiff_t *libc_rseq_offset_p;
-static const unsigned int *libc_rseq_size_p;
-static const unsigned int *libc_rseq_flags_p;
+/*
+ * Define weak versions to play nice with binaries that are statically linked
+ * against a libc that doesn't support registering its own rseq.
+ */
+__weak ptrdiff_t __rseq_offset;
+__weak unsigned int __rseq_size;
+__weak unsigned int __rseq_flags;
+
+static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
+static const unsigned int *libc_rseq_size_p = &__rseq_size;
+static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
 
 /* Offset from the thread pointer to the rseq area. */
 ptrdiff_t rseq_offset;
@@ -155,9 +165,17 @@ unsigned int get_rseq_feature_size(void)
 static __attribute__((constructor))
 void rseq_init(void)
 {
-       libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
-       libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
-       libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+       /*
+        * If the libc's registered rseq size isn't already valid, it may be
+        * because the binary is dynamically linked and not necessarily due to
+        * libc not having registered a restartable sequence.  Try to find the
+        * symbols if that's the case.
+        */
+       if (!*libc_rseq_size_p) {
+               libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+               libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+               libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+       }
        if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
                        *libc_rseq_size_p != 0) {
                /* rseq registration owned by glibc */
index 43ec36b..38f6514 100644 (file)
@@ -2184,6 +2184,9 @@ FIXTURE_TEARDOWN(TRACE_syscall)
 
 TEST(negative_ENOSYS)
 {
+#if defined(__arm__)
+       SKIP(return, "arm32 does not support calling syscall -1");
+#endif
        /*
         * There should be no difference between an "internal" skip
         * and userspace asking for syscall "-1".
@@ -3072,7 +3075,8 @@ TEST(syscall_restart)
                timeout.tv_sec = 1;
                errno = 0;
                EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
-                       TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+                       TH_LOG("Call to nanosleep() failed (errno %d: %s)",
+                               errno, strerror(errno));
                }
 
                /* Read final sync from parent. */
@@ -3908,6 +3912,9 @@ TEST(user_notification_filter_empty)
                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
        }
 
+       if (__NR_clone3 < 0)
+               SKIP(return, "Test not built with clone3 support");
+
        pid = sys_clone3(&args, sizeof(args));
        ASSERT_GE(pid, 0);
 
@@ -3962,6 +3969,9 @@ TEST(user_notification_filter_empty_threaded)
                TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
        }
 
+       if (__NR_clone3 < 0)
+               SKIP(return, "Test not built with clone3 support");
+
        pid = sys_clone3(&args, sizeof(args));
        ASSERT_GE(pid, 0);
 
@@ -4255,6 +4265,61 @@ TEST(user_notification_addfd_rlimit)
        close(memfd);
 }
 
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS  SECCOMP_IOW(4, __u64)
+#endif
+
+TEST(user_notification_sync)
+{
+       struct seccomp_notif req = {};
+       struct seccomp_notif_resp resp = {};
+       int status, listener;
+       pid_t pid;
+       long ret;
+
+       ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+       ASSERT_EQ(0, ret) {
+               TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+       }
+
+       listener = user_notif_syscall(__NR_getppid,
+                                     SECCOMP_FILTER_FLAG_NEW_LISTENER);
+       ASSERT_GE(listener, 0);
+
+       /* Try to set invalid flags. */
+       EXPECT_SYSCALL_RETURN(-EINVAL,
+               ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
+
+       ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+                       SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
+
+       pid = fork();
+       ASSERT_GE(pid, 0);
+       if (pid == 0) {
+               ret = syscall(__NR_getppid);
+               ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
+                       _exit(1);
+               }
+               _exit(0);
+       }
+
+       req.pid = 0;
+       ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+       ASSERT_EQ(req.data.nr,  __NR_getppid);
+
+       resp.id = req.id;
+       resp.error = 0;
+       resp.val = USER_NOTIF_MAGIC;
+       resp.flags = 0;
+       ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+       ASSERT_EQ(waitpid(pid, &status, 0), pid);
+       ASSERT_EQ(status, 0);
+}
+
+
 /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
 FIXTURE(O_SUSPEND_SECCOMP) {
        pid_t pid;
index 6e73b09..7170619 100644 (file)
@@ -5,6 +5,8 @@ CONFIG_NF_CONNTRACK=m
 CONFIG_NF_CONNTRACK_MARK=y
 CONFIG_NF_CONNTRACK_ZONES=y
 CONFIG_NF_CONNTRACK_LABELS=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_FLOW_TABLE=m
 CONFIG_NF_NAT=m
 CONFIG_NETFILTER_XT_TARGET_LOG=m
 
diff --git a/tools/testing/selftests/tc-testing/settings b/tools/testing/selftests/tc-testing/settings
new file mode 100644 (file)
index 0000000..e220626
--- /dev/null
@@ -0,0 +1 @@
+timeout=900
index 147899a..976dffd 100644 (file)
             "$TC qdisc del dev $DUMMY handle 1: root",
             "$IP link del dev $DUMMY type dummy"
         ]
+    },
+    {
+        "id": "85ee",
+        "name": "QFQ with big MTU",
+        "category": [
+            "qdisc",
+            "qfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$IP link set dev $DUMMY mtu 2147483647 || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root qfq"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+        "expExitCode": "2",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class qfq 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "ddfa",
+        "name": "QFQ with small MTU",
+        "category": [
+            "qdisc",
+            "qfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$IP link set dev $DUMMY mtu 256 || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root qfq"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+        "expExitCode": "2",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class qfq 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5993",
+        "name": "QFQ with stab overhead greater than max packet len",
+        "category": [
+            "qdisc",
+            "qfq",
+            "scapy"
+        ],
+        "plugins": {
+            "requires": [
+                "nsPlugin",
+                "scapyPlugin"
+            ]
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$IP link set dev $DUMMY up || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: stab mtu 2048 tsize 512 mpu 0 overhead 999999999 linklayer ethernet root qfq",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+            "$TC qdisc add dev $DEV1 clsact",
+            "$TC filter add dev $DEV1 ingress protocol ip flower dst_ip 1.3.3.7/32 action mirred egress mirror dev $DUMMY"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: matchall classid 1:1",
+        "scapy": [
+            {
+                "iface": "$DEV0",
+                "count": 22,
+                "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='1.3.3.7')/TCP(sport=5000,dport=10)"
+            }
+        ],
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s qdisc ls dev $DUMMY",
+        "matchPattern": "dropped 22",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root qfq"
+        ]
     }
 ]
index a444553..08d4861 100644 (file)
         "teardown": [
             "echo \"1\" > /sys/bus/netdevsim/del_device"
         ]
+    },
+    {
+        "id": "3e1e",
+        "name": "Add taprio Qdisc with an invalid cycle-time",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI cycle-time 4294967296 || /bin/true",
+            "$IP link set dev $ETH up",
+            "$IP addr add 10.10.10.10/24 dev $ETH"
+        ],
+        "cmdUnderTest": "/bin/true",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc taprio 1: root refcnt",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
     }
 ]
index 5beceee..6eba203 100644 (file)
@@ -129,8 +129,7 @@ int main(int argc, char **argv)
        printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
 
        /* Avg the two actual freq samples adjtimex gave us */
-       ppm = (tx1.freq + tx2.freq) * 1000 / 2;
-       ppm = (long long)tx1.freq * 1000;
+       ppm = (long long)(tx1.freq + tx2.freq) * 1000 / 2;
        ppm = shift_right(ppm, 16);
        printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
 
index d6979a4..91a4444 100644 (file)
@@ -217,6 +217,18 @@ TEST_F(user, matching) {
        /* Types don't match */
        TEST_NMATCH("__test_event u64 a; u64 b",
                    "__test_event u32 a; u32 b");
+
+       /* Struct name and size matches */
+       TEST_MATCH("__test_event struct my_struct a 20",
+                  "__test_event struct my_struct a 20");
+
+       /* Struct name don't match */
+       TEST_NMATCH("__test_event struct my_struct a 20",
+                   "__test_event struct my_struct b 20");
+
+       /* Struct size don't match */
+       TEST_NMATCH("__test_event struct my_struct a 20",
+                   "__test_event struct my_struct a 21");
 }
 
 int main(int argc, char **argv)
index 43a254f..21a98ba 100644 (file)
@@ -8,5 +8,5 @@ vsock_perf: vsock_perf.o
 CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
 .PHONY: all test clean
 clean:
-       ${RM} *.o *.d vsock_test vsock_diag_test
+       ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf
 -include *.d
index dfbaafb..5bbb561 100644 (file)
@@ -4035,8 +4035,17 @@ static ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer,
                        sizeof(vcpu->stat), user_buffer, size, offset);
 }
 
+static int kvm_vcpu_stats_release(struct inode *inode, struct file *file)
+{
+       struct kvm_vcpu *vcpu = file->private_data;
+
+       kvm_put_kvm(vcpu->kvm);
+       return 0;
+}
+
 static const struct file_operations kvm_vcpu_stats_fops = {
        .read = kvm_vcpu_stats_read,
+       .release = kvm_vcpu_stats_release,
        .llseek = noop_llseek,
 };
 
@@ -4057,6 +4066,9 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
                put_unused_fd(fd);
                return PTR_ERR(file);
        }
+
+       kvm_get_kvm(vcpu->kvm);
+
        file->f_mode |= FMODE_PREAD;
        fd_install(fd, file);
 
@@ -4701,8 +4713,17 @@ static ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer,
                                sizeof(kvm->stat), user_buffer, size, offset);
 }
 
+static int kvm_vm_stats_release(struct inode *inode, struct file *file)
+{
+       struct kvm *kvm = file->private_data;
+
+       kvm_put_kvm(kvm);
+       return 0;
+}
+
 static const struct file_operations kvm_vm_stats_fops = {
        .read = kvm_vm_stats_read,
+       .release = kvm_vm_stats_release,
        .llseek = noop_llseek,
 };
 
@@ -4721,6 +4742,9 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
                put_unused_fd(fd);
                return PTR_ERR(file);
        }
+
+       kvm_get_kvm(kvm);
+
        file->f_mode |= FMODE_PREAD;
        fd_install(fd, file);